Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC 1af512c5b4 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-hotpot
Source: Original Platform
2026-04-11 11:51:04 +08:00

11044 lines
696 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6358064756244601,
"calibration/batch_distribution_entropy": 0.6431098183707868,
"calibration/batch_entropy_100bins": 0.48089187317226323,
"calibration/batch_entropy_10bins": 0.6431098183707868,
"calibration/batch_entropy_50bins": 0.5617938193030543,
"calibration/batch_uniqueness": 0.7219718974960545,
"calibration/confidence_entropy": 0.34696880251966167,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.49592420401806236,
"calibration/mean_confidence": 0.7925940600227801,
"calibration/prompt_uniqueness": 0.5942279192380695,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0345703125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1495.6,
"completions/mean_length": 270.69580078125,
"completions/mean_terminated_length": 225.39390869140624,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.06927429139614105,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0744,
"num_tokens": 17615957.0,
"reward": 0.533476448059082,
"reward_std": 0.4068940103054047,
"rewards/accuracy_reward": 0.219921875,
"rewards/brier_reward": 0.3760594606399536,
"rewards/confidence_uniqueness_reward": 0.48737336993217467,
"rewards/format_reward": 0.68427734375,
"rewards/frontier_aurc_reward": 0.30170206129550936,
"rewards/frontier_coverage_0": 0.30170206129550936,
"rewards/frontier_coverage_1": 0.30170206129550936,
"rewards/frontier_coverage_10": 0.30170206129550936,
"rewards/frontier_coverage_15": 0.30170206129550936,
"rewards/frontier_coverage_20": 0.30170206129550936,
"rewards/frontier_coverage_25": 0.30170206129550936,
"rewards/frontier_coverage_5": 0.30170206129550936,
"rewards/frontier_ece_reward": 0.30170206129550936,
"rewards/frontier_entropy_batch_reward": -0.6530686259269715,
"signal/accuracy_reward/centered_abs_mean": 0.2394775390625,
"signal/accuracy_reward/group_bin_occupancy": 0.21015625,
"signal/accuracy_reward/group_std_mean": 0.28177876472473146,
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11973876953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11973876953125,
"signal/advantage_abs_mean": 0.34517702460289,
"signal/advantage_pre_scale_abs_mean": 0.34517702460289,
"signal/advantage_pre_scale_std": 0.4175687491893768,
"signal/advantage_std": 0.4175687491893768,
"signal/brier_reward/centered_abs_mean": 0.31782959699630736,
"signal/brier_reward/group_bin_occupancy": 0.747265625,
"signal/brier_reward/group_std_mean": 0.3630960941314697,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031782958284020425,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.031782958284020425,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.29565892815589906,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.58359375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3465812742710114,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02956589199602604,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02956589199602604,
"signal/format_reward/centered_abs_mean": 0.399285888671875,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.4503865897655487,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1996429443359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1996429443359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2909155905246735,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.65859375,
"signal/frontier_aurc_reward/group_std_mean": 0.34205764532089233,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_0/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_0/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_0/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_1/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_1/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_1/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_10/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_10/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_10/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_15/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_15/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_15/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_20/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_20/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_20/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_25/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_25/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_25/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_5/centered_abs_mean": 0.2909155905246735,
"signal/frontier_coverage_5/group_bin_occupancy": 0.65859375,
"signal/frontier_coverage_5/group_std_mean": 0.34205764532089233,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036364448722451927,
"signal/frontier_ece_reward/centered_abs_mean": 0.2909155905246735,
"signal/frontier_ece_reward/group_bin_occupancy": 0.65859375,
"signal/frontier_ece_reward/group_std_mean": 0.34205764532089233,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02909155897796154,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02909155897796154,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.424519544839859,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.314453125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47118043899536133,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042451954632997516,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042451954632997516,
"step": 5
},
{
"calibration/aurc": 0.6633336947681945,
"calibration/batch_distribution_entropy": 0.6530785282030743,
"calibration/batch_entropy_100bins": 0.4852722322513416,
"calibration/batch_entropy_10bins": 0.6530785282030743,
"calibration/batch_entropy_50bins": 0.5661661966634106,
"calibration/batch_uniqueness": 0.7272441743970559,
"calibration/confidence_entropy": 0.3523645170870256,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5257630611304681,
"calibration/mean_confidence": 0.7933062167842394,
"calibration/prompt_uniqueness": 0.6178022073084117,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1488.2,
"completions/mean_length": 261.59072265625,
"completions/mean_terminated_length": 211.972216796875,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.030047137290239334,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0764,
"num_tokens": 35394998.0,
"reward": 0.5451710224151611,
"reward_std": 0.38366069793701174,
"rewards/accuracy_reward": 0.20810546875,
"rewards/brier_reward": 0.3810562252998352,
"rewards/confidence_uniqueness_reward": 0.5187723219394684,
"rewards/format_reward": 0.7197265625,
"rewards/frontier_aurc_reward": 0.3000528335571289,
"rewards/frontier_coverage_0": 0.3000528335571289,
"rewards/frontier_coverage_1": 0.3000528335571289,
"rewards/frontier_coverage_10": 0.3000528335571289,
"rewards/frontier_coverage_15": 0.3000528335571289,
"rewards/frontier_coverage_20": 0.3000528335571289,
"rewards/frontier_coverage_25": 0.3000528335571289,
"rewards/frontier_coverage_5": 0.3000528335571289,
"rewards/frontier_ece_reward": 0.3000528335571289,
"rewards/frontier_entropy_batch_reward": -0.6873842597007751,
"signal/accuracy_reward/centered_abs_mean": 0.216424560546875,
"signal/accuracy_reward/group_bin_occupancy": 0.20703125,
"signal/accuracy_reward/group_std_mean": 0.26217670142650606,
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1082122802734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1082122802734375,
"signal/advantage_abs_mean": 0.3162668466567993,
"signal/advantage_pre_scale_abs_mean": 0.3162668466567993,
"signal/advantage_pre_scale_std": 0.3942062079906464,
"signal/advantage_std": 0.3942062079906464,
"signal/brier_reward/centered_abs_mean": 0.3037886917591095,
"signal/brier_reward/group_bin_occupancy": 0.775390625,
"signal/brier_reward/group_std_mean": 0.3516114354133606,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03037887029349804,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03037887029349804,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28001424074172976,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.580859375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3388310194015503,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028001424670219422,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028001424670219422,
"signal/format_reward/centered_abs_mean": 0.37132568359375,
"signal/format_reward/group_bin_occupancy": 0.249609375,
"signal/format_reward/group_std_mean": 0.4337587058544159,
"signal/format_reward/group_zero_std_frac": 0.003125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.185662841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.185662841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.27608999609947205,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.683203125,
"signal/frontier_aurc_reward/group_std_mean": 0.33003708720207214,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_0/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_0/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_0/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_1/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_1/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_1/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_10/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_10/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_10/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_15/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_15/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_15/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_20/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_20/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_20/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_25/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_25/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_25/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_5/centered_abs_mean": 0.27608999609947205,
"signal/frontier_coverage_5/group_bin_occupancy": 0.683203125,
"signal/frontier_coverage_5/group_std_mean": 0.33003708720207214,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003451125044375658,
"signal/frontier_ece_reward/centered_abs_mean": 0.27608999609947205,
"signal/frontier_ece_reward/group_bin_occupancy": 0.683203125,
"signal/frontier_ece_reward/group_std_mean": 0.33003708720207214,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.027609000355005263,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.027609000355005263,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39845545291900636,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4560263633728027,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03984554782509804,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03984554782509804,
"step": 10
},
{
"calibration/aurc": 0.6056235500133583,
"calibration/batch_distribution_entropy": 0.6372280867706955,
"calibration/batch_entropy_100bins": 0.48009095551927256,
"calibration/batch_entropy_10bins": 0.6372280867706955,
"calibration/batch_entropy_50bins": 0.5595683840082752,
"calibration/batch_uniqueness": 0.7113122520911674,
"calibration/buffer_distribution_entropy": 0.6568801862675887,
"calibration/buffer_entropy_100bins": 0.49209269792202925,
"calibration/buffer_entropy_10bins": 0.6568801862675887,
"calibration/buffer_entropy_50bins": 0.5730805301755447,
"calibration/confidence_entropy": 0.35123976578789656,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47231641548769077,
"calibration/mean_confidence": 0.804845781710738,
"calibration/prompt_uniqueness": 0.6089974924774788,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.01650390625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1428.2,
"completions/mean_length": 200.514453125,
"completions/mean_terminated_length": 178.22185363769532,
"completions/min_length": 9.6,
"completions/min_terminated_length": 9.6,
"epoch": 0.048,
"grad_norm": 0.05051470175385475,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0486,
"num_tokens": 52497002.0,
"reward": 0.665993869304657,
"reward_std": 0.3059393674135208,
"rewards/accuracy_reward": 0.27236328125,
"rewards/brier_reward": 0.48226693272590637,
"rewards/confidence_uniqueness_reward": 0.6447442531585693,
"rewards/format_reward": 0.8810546875,
"rewards/frontier_aurc_reward": 0.29981047259643673,
"rewards/frontier_coverage_0": 0.3134632341563702,
"rewards/frontier_coverage_1": 0.3134632341563702,
"rewards/frontier_coverage_10": 0.3134632341563702,
"rewards/frontier_coverage_15": 0.3134632341563702,
"rewards/frontier_coverage_20": 0.3134632341563702,
"rewards/frontier_coverage_25": 0.3134632341563702,
"rewards/frontier_coverage_5": 0.3134632341563702,
"rewards/frontier_ece_reward": 0.2883337765932083,
"rewards/frontier_entropy_batch_reward": -0.8342528104782104,
"signal/accuracy_reward/centered_abs_mean": 0.202545166015625,
"signal/accuracy_reward/group_bin_occupancy": 0.207421875,
"signal/accuracy_reward/group_std_mean": 0.2523681789636612,
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1012725830078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1012725830078125,
"signal/advantage_abs_mean": 0.23214915990829468,
"signal/advantage_pre_scale_abs_mean": 0.23214915990829468,
"signal/advantage_pre_scale_std": 0.31858267784118655,
"signal/advantage_std": 0.31858267784118655,
"signal/brier_reward/centered_abs_mean": 0.2716783404350281,
"signal/brier_reward/group_bin_occupancy": 0.81015625,
"signal/brier_reward/group_std_mean": 0.3263775706291199,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02716783434152603,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02716783434152603,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1999937564134598,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.597265625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.26394935250282286,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019999375380575658,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019999375380575658,
"signal/format_reward/centered_abs_mean": 0.19697265625,
"signal/format_reward/group_bin_occupancy": 0.24140625,
"signal/format_reward/group_std_mean": 0.2975906074047089,
"signal/format_reward/group_zero_std_frac": 0.06875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.098486328125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.098486328125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.215498910844326,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_aurc_reward/group_std_mean": 0.2603446511551738,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0026937363953038586,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0026937363953038586,
"signal/frontier_coverage_0/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_0/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_0/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_1/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_1/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_1/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_10/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_10/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_10/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_15/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_15/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_15/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_20/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_20/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_20/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_25/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_25/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_25/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_5/centered_abs_mean": 0.23299580514431,
"signal/frontier_coverage_5/group_bin_occupancy": 0.70546875,
"signal/frontier_coverage_5/group_std_mean": 0.2876336514949799,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029124475782737135,
"signal/frontier_ece_reward/centered_abs_mean": 0.24313633441925048,
"signal/frontier_ece_reward/group_bin_occupancy": 0.712890625,
"signal/frontier_ece_reward/group_std_mean": 0.29327360093593596,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024313633516430854,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024313633516430854,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26260979771614074,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.334375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3697131097316742,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626098096370697,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626098096370697,
"step": 15
},
{
"calibration/aurc": 0.5229047865146916,
"calibration/batch_distribution_entropy": 0.6965577173291834,
"calibration/batch_entropy_100bins": 0.5146396132435052,
"calibration/batch_entropy_10bins": 0.6965577173291834,
"calibration/batch_entropy_50bins": 0.6023062687429694,
"calibration/batch_uniqueness": 0.7574193666928948,
"calibration/buffer_distribution_entropy": 0.657240172374993,
"calibration/buffer_entropy_100bins": 0.49448125756617145,
"calibration/buffer_entropy_10bins": 0.657240172374993,
"calibration/buffer_entropy_50bins": 0.5759482684510908,
"calibration/confidence_entropy": 0.37992020571579327,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3746292271322627,
"calibration/mean_confidence": 0.7828071817671975,
"calibration/prompt_uniqueness": 0.6806407808231312,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00361328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1317.4,
"completions/mean_length": 137.51865234375,
"completions/mean_terminated_length": 132.4542221069336,
"completions/min_length": 24.4,
"completions/min_terminated_length": 24.4,
"epoch": 0.064,
"grad_norm": 0.008556111715734005,
"learning_rate": 1e-06,
"loss": 0.0126,
"num_tokens": 68823593.0,
"reward": 0.7013731718063354,
"reward_std": 0.2047277569770813,
"rewards/accuracy_reward": 0.3408203125,
"rewards/brier_reward": 0.5739028096199036,
"rewards/confidence_uniqueness_reward": 0.7480879545211792,
"rewards/format_reward": 0.97705078125,
"rewards/frontier_aurc_reward": -0.006883773859590292,
"rewards/frontier_coverage_0": 0.06146884858608246,
"rewards/frontier_coverage_1": 0.06146884858608246,
"rewards/frontier_coverage_10": 0.06146884858608246,
"rewards/frontier_coverage_15": 0.06146884858608246,
"rewards/frontier_coverage_20": 0.06146884858608246,
"rewards/frontier_coverage_25": 0.06146884858608246,
"rewards/frontier_coverage_5": 0.06146884858608246,
"rewards/frontier_ece_reward": -0.050785575062036514,
"rewards/frontier_entropy_batch_reward": -0.8997539043426513,
"signal/accuracy_reward/centered_abs_mean": 0.211279296875,
"signal/accuracy_reward/group_bin_occupancy": 0.20625,
"signal/accuracy_reward/group_std_mean": 0.25800455510616305,
"signal/accuracy_reward/group_zero_std_frac": 0.35,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1056396484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1056396484375,
"signal/advantage_abs_mean": 0.1570647269487381,
"signal/advantage_pre_scale_abs_mean": 0.1570647269487381,
"signal/advantage_pre_scale_std": 0.22092486619949342,
"signal/advantage_std": 0.22092486619949342,
"signal/brier_reward/centered_abs_mean": 0.24524094462394713,
"signal/brier_reward/group_bin_occupancy": 0.844140625,
"signal/brier_reward/group_std_mean": 0.3004362642765045,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02452409528195858,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02452409528195858,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1221130445599556,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1624012291431427,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012211304530501366,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012211304530501366,
"signal/format_reward/centered_abs_mean": 0.043353271484375,
"signal/format_reward/group_bin_occupancy": 0.190234375,
"signal/format_reward/group_std_mean": 0.10569706857204438,
"signal/format_reward/group_zero_std_frac": 0.478125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0216766357421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0216766357421875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005248846765607595,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_aurc_reward/group_std_mean": 0.007484708447009325,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.56105883535929e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.56105883535929e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_0/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_0/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_1/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_1/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_1/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_10/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_10/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_10/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_15/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_15/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_15/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_20/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_20/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_20/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_25/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_25/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_25/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_5/centered_abs_mean": 0.10489135384559631,
"signal/frontier_coverage_5/group_bin_occupancy": 0.695703125,
"signal/frontier_coverage_5/group_std_mean": 0.1639949709177017,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013111419510096311,
"signal/frontier_ece_reward/centered_abs_mean": 0.14598130881786348,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7453125,
"signal/frontier_ece_reward/group_std_mean": 0.17359468340873718,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014598131738603115,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014598131738603115,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17333437800407409,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29973788261413575,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017333437874913215,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017333437874913215,
"step": 20
},
{
"calibration/aurc": 0.6283088834520059,
"calibration/batch_distribution_entropy": 0.8095181952478757,
"calibration/batch_entropy_100bins": 0.6000067690369304,
"calibration/batch_entropy_10bins": 0.8095181952478757,
"calibration/batch_entropy_50bins": 0.6862870972074454,
"calibration/batch_uniqueness": 0.831063874562125,
"calibration/buffer_distribution_entropy": 0.6868147973448078,
"calibration/buffer_entropy_100bins": 0.515115289448944,
"calibration/buffer_entropy_10bins": 0.6868147973448078,
"calibration/buffer_entropy_50bins": 0.5986524147896154,
"calibration/confidence_entropy": 0.45480330324323653,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3972159911131528,
"calibration/mean_confidence": 0.7111007678831154,
"calibration/prompt_uniqueness": 0.7651551820929442,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013671875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 733.0,
"completions/mean_length": 118.575,
"completions/mean_terminated_length": 116.63536529541015,
"completions/min_length": 35.8,
"completions/min_terminated_length": 35.8,
"epoch": 0.08,
"grad_norm": 0.013943054713308811,
"learning_rate": 1e-06,
"loss": 0.0027,
"num_tokens": 84970953.0,
"reward": 0.735591733455658,
"reward_std": 0.1745338499546051,
"rewards/accuracy_reward": 0.35390625,
"rewards/brier_reward": 0.6262224793434144,
"rewards/confidence_uniqueness_reward": 0.8280656814575196,
"rewards/format_reward": 0.9927734375,
"rewards/frontier_aurc_reward": -0.005787147860974074,
"rewards/frontier_coverage_0": 0.06969771385192872,
"rewards/frontier_coverage_1": 0.06969771385192872,
"rewards/frontier_coverage_10": 0.06969771385192872,
"rewards/frontier_coverage_15": 0.06969771385192872,
"rewards/frontier_coverage_20": 0.06969771385192872,
"rewards/frontier_coverage_25": 0.06969771385192872,
"rewards/frontier_coverage_5": 0.06969771385192872,
"rewards/frontier_ece_reward": -0.04099251367151737,
"rewards/frontier_entropy_batch_reward": -0.8510387659072876,
"signal/accuracy_reward/centered_abs_mean": 0.19227294921875,
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
"signal/accuracy_reward/group_std_mean": 0.24025425910949708,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.096136474609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.096136474609375,
"signal/advantage_abs_mean": 0.13553658425807952,
"signal/advantage_pre_scale_abs_mean": 0.13553658425807952,
"signal/advantage_pre_scale_std": 0.19173393845558168,
"signal/advantage_std": 0.19173393845558168,
"signal/brier_reward/centered_abs_mean": 0.2213940680027008,
"signal/brier_reward/group_bin_occupancy": 0.88359375,
"signal/brier_reward/group_std_mean": 0.2728204667568207,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022139406949281692,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022139406949281692,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07314713597297669,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.72421875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10160589665174484,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0073147137649357315,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0073147137649357315,
"signal/format_reward/centered_abs_mean": 0.01385498046875,
"signal/format_reward/group_bin_occupancy": 0.14921875,
"signal/format_reward/group_std_mean": 0.0368439082056284,
"signal/format_reward/group_zero_std_frac": 0.80625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006927490234375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.006927490234375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035542991012334824,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125,
"signal/frontier_aurc_reward/group_std_mean": 0.005189351085573435,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.442873832886107e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.442873832886107e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_0/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_0/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_1/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_1/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_1/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_10/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_10/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_10/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_15/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_15/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_15/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_20/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_20/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_20/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_25/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_25/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_25/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_5/centered_abs_mean": 0.13423685133457183,
"signal/frontier_coverage_5/group_bin_occupancy": 0.77578125,
"signal/frontier_coverage_5/group_std_mean": 0.1972368836402893,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016779606696218253,
"signal/frontier_ece_reward/centered_abs_mean": 0.13046298176050186,
"signal/frontier_ece_reward/group_bin_occupancy": 0.812890625,
"signal/frontier_ece_reward/group_std_mean": 0.1628311574459076,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013046298176050186,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013046298176050186,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24598013758659362,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.45390625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38413644433021543,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02459801435470581,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02459801435470581,
"step": 25
},
{
"calibration/aurc": 0.648785096123655,
"calibration/batch_distribution_entropy": 0.9380209005564305,
"calibration/batch_entropy_100bins": 0.7781107329568903,
"calibration/batch_entropy_10bins": 0.9380209005564305,
"calibration/batch_entropy_50bins": 0.850847448214451,
"calibration/batch_uniqueness": 0.9082612624262516,
"calibration/buffer_distribution_entropy": 0.7433120040362949,
"calibration/buffer_entropy_100bins": 0.564207235491536,
"calibration/buffer_entropy_10bins": 0.7433120040362949,
"calibration/buffer_entropy_50bins": 0.6484998987649865,
"calibration/confidence_entropy": 0.5158145753859638,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.29242145964555044,
"calibration/mean_confidence": 0.5788182740863752,
"calibration/prompt_uniqueness": 0.8489781667317032,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0017578125,
"completions/max_length": 1433.0,
"completions/max_terminated_length": 763.8,
"completions/mean_length": 115.12734375,
"completions/mean_terminated_length": 112.62236328125,
"completions/min_length": 37.8,
"completions/min_terminated_length": 37.8,
"epoch": 0.096,
"grad_norm": 0.0034602871164679527,
"learning_rate": 1e-06,
"loss": 0.0056,
"num_tokens": 101194465.0,
"reward": 0.7778663992881775,
"reward_std": 0.16724584996700287,
"rewards/accuracy_reward": 0.35966796875,
"rewards/brier_reward": 0.6825961947441102,
"rewards/confidence_uniqueness_reward": 0.906465494632721,
"rewards/format_reward": 0.99453125,
"rewards/frontier_aurc_reward": -0.0052437069825828075,
"rewards/frontier_coverage_0": 0.10269922763109207,
"rewards/frontier_coverage_1": 0.10269922763109207,
"rewards/frontier_coverage_10": 0.10269922763109207,
"rewards/frontier_coverage_15": 0.10269922763109207,
"rewards/frontier_coverage_20": 0.10269922763109207,
"rewards/frontier_coverage_25": 0.10269922763109207,
"rewards/frontier_coverage_5": 0.10269922763109207,
"rewards/frontier_ece_reward": -0.03292221836745739,
"rewards/frontier_entropy_batch_reward": -0.6376779556274415,
"signal/accuracy_reward/centered_abs_mean": 0.185467529296875,
"signal/accuracy_reward/group_bin_occupancy": 0.20234375,
"signal/accuracy_reward/group_std_mean": 0.23311618864536285,
"signal/accuracy_reward/group_zero_std_frac": 0.38125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0927337646484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0927337646484375,
"signal/advantage_abs_mean": 0.13114093095064164,
"signal/advantage_pre_scale_abs_mean": 0.13114093095064164,
"signal/advantage_pre_scale_std": 0.1809857577085495,
"signal/advantage_std": 0.1809857577085495,
"signal/brier_reward/centered_abs_mean": 0.22195914387702942,
"signal/brier_reward/group_bin_occupancy": 0.914453125,
"signal/brier_reward/group_std_mean": 0.2706751048564911,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022195914760231972,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.022195914760231972,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05482520312070847,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.744921875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0789007768034935,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005482520535588264,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005482520535588264,
"signal/format_reward/centered_abs_mean": 0.01048583984375,
"signal/format_reward/group_bin_occupancy": 0.144140625,
"signal/format_reward/group_std_mean": 0.02846333533525467,
"signal/format_reward/group_zero_std_frac": 0.846875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005242919921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005242919921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026835352182388306,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125,
"signal/frontier_aurc_reward/group_std_mean": 0.004022491350769997,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3544190227985385e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3544190227985385e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_0/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_0/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_1/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_1/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_10/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_10/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_15/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_15/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_20/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_20/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_25/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_25/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_5/centered_abs_mean": 0.20481350123882294,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88515625,
"signal/frontier_coverage_5/group_std_mean": 0.2719453454017639,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025601688772439956,
"signal/frontier_ece_reward/centered_abs_mean": 0.12130876779556274,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83046875,
"signal/frontier_ece_reward/group_std_mean": 0.1663817882537842,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012130877003073692,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012130877003073692,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44812787771224977,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.614453125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5313847541809082,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0448127880692482,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0448127880692482,
"step": 30
},
{
"calibration/aurc": 0.5053299592442537,
"calibration/batch_distribution_entropy": 0.9488818143342655,
"calibration/batch_entropy_100bins": 0.9306714343967982,
"calibration/batch_entropy_10bins": 0.9488818143342655,
"calibration/batch_entropy_50bins": 0.9460526008785772,
"calibration/batch_uniqueness": 0.9490444932260778,
"calibration/buffer_distribution_entropy": 0.8224099484197692,
"calibration/buffer_entropy_100bins": 0.6592172461309497,
"calibration/buffer_entropy_10bins": 0.8224099484197692,
"calibration/buffer_entropy_50bins": 0.7358472349793205,
"calibration/confidence_entropy": 0.5183275438944214,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0027450980392156863,
"calibration/coverage@20%": 0.0027450980392156863,
"calibration/coverage@25%": 0.008627450980392156,
"calibration/coverage@30%": 0.020375273397030044,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.1796229260918823,
"calibration/mean_confidence": 0.40559769162291126,
"calibration/prompt_uniqueness": 0.8886733536752803,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 576.4,
"completions/mean_length": 106.2763671875,
"completions/mean_terminated_length": 104.87900238037109,
"completions/min_length": 36.0,
"completions/min_terminated_length": 36.0,
"epoch": 0.112,
"grad_norm": 0.0038092397153377533,
"learning_rate": 1e-06,
"loss": 0.0021,
"num_tokens": 117392207.0,
"reward": 0.8370797395706177,
"reward_std": 0.13981394171714784,
"rewards/accuracy_reward": 0.39765625,
"rewards/brier_reward": 0.7146290063858032,
"rewards/confidence_uniqueness_reward": 0.9453599929809571,
"rewards/format_reward": 0.99765625,
"rewards/frontier_aurc_reward": -0.004691840149462223,
"rewards/frontier_coverage_0": 0.11487203687429429,
"rewards/frontier_coverage_1": 0.11487203687429429,
"rewards/frontier_coverage_10": 0.11487203687429429,
"rewards/frontier_coverage_15": 0.11487203687429429,
"rewards/frontier_coverage_20": 0.11487203687429429,
"rewards/frontier_coverage_25": 0.11487203687429429,
"rewards/frontier_coverage_5": 0.11487203687429429,
"rewards/frontier_ece_reward": -0.006113046361133456,
"rewards/frontier_entropy_batch_reward": -0.35956743359565735,
"signal/accuracy_reward/centered_abs_mean": 0.1920654296875,
"signal/accuracy_reward/group_bin_occupancy": 0.20625,
"signal/accuracy_reward/group_std_mean": 0.24405551552772523,
"signal/accuracy_reward/group_zero_std_frac": 0.35,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09603271484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09603271484375,
"signal/advantage_abs_mean": 0.10965722203254699,
"signal/advantage_pre_scale_abs_mean": 0.10965722203254699,
"signal/advantage_pre_scale_std": 0.15251348316669464,
"signal/advantage_std": 0.15251348316669464,
"signal/brier_reward/centered_abs_mean": 0.20627183914184571,
"signal/brier_reward/group_bin_occupancy": 0.90703125,
"signal/brier_reward/group_std_mean": 0.25678886771202086,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020627183839678764,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020627183839678764,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026137924194335936,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03944253027439117,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002613792475312948,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002613792475312948,
"signal/format_reward/centered_abs_mean": 0.00452880859375,
"signal/format_reward/group_bin_occupancy": 0.133984375,
"signal/format_reward/group_std_mean": 0.012921943515539169,
"signal/format_reward/group_zero_std_frac": 0.928125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002264404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002264404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016326952259987592,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875,
"signal/frontier_aurc_reward/group_std_mean": 0.0026589396875351667,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0408690397744066e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0408690397744066e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_0/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_0/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_1/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_1/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_1/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_10/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_10/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_10/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_15/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_15/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_15/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_20/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_20/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_20/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_25/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_25/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_25/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_5/centered_abs_mean": 0.2962383508682251,
"signal/frontier_coverage_5/group_bin_occupancy": 0.94453125,
"signal/frontier_coverage_5/group_std_mean": 0.3698026418685913,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003702979441732168,
"signal/frontier_ece_reward/centered_abs_mean": 0.06853184774518013,
"signal/frontier_ece_reward/group_bin_occupancy": 0.810546875,
"signal/frontier_ece_reward/group_std_mean": 0.10600927323102952,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006853185035288334,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006853185035288334,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.421990305185318,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48495404720306395,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04219903200864792,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04219903200864792,
"step": 35
},
{
"calibration/aurc": 0.5568013985695004,
"calibration/batch_distribution_entropy": 0.9200766038929988,
"calibration/batch_entropy_100bins": 0.9329721377257479,
"calibration/batch_entropy_10bins": 0.9200766038929988,
"calibration/batch_entropy_50bins": 0.9367769710746388,
"calibration/batch_uniqueness": 0.942095789057279,
"calibration/buffer_distribution_entropy": 0.8899342638622837,
"calibration/buffer_entropy_100bins": 0.7478820117698411,
"calibration/buffer_entropy_10bins": 0.8899342638622837,
"calibration/buffer_entropy_50bins": 0.8124210230759926,
"calibration/confidence_entropy": 0.4988997779411394,
"calibration/coverage@0%": 0.001175703157975519,
"calibration/coverage@1%": 0.001175703157975519,
"calibration/coverage@10%": 0.004705114922681402,
"calibration/coverage@15%": 0.004705114922681402,
"calibration/coverage@20%": 0.0074502129618970875,
"calibration/coverage@25%": 0.008626683550132382,
"calibration/coverage@30%": 0.018822761981504933,
"calibration/coverage@5%": 0.001175703157975519,
"calibration/ece": 0.18915972842799716,
"calibration/mean_confidence": 0.34738454918178696,
"calibration/prompt_uniqueness": 0.8832753255885797,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 759.6,
"completions/mean_length": 107.24765625,
"completions/mean_terminated_length": 105.431201171875,
"completions/min_length": 41.0,
"completions/min_terminated_length": 41.0,
"epoch": 0.128,
"grad_norm": 0.0016961501678451896,
"learning_rate": 1e-06,
"loss": 0.0051,
"num_tokens": 133407095.0,
"reward": 0.8337414741516114,
"reward_std": 0.12419430166482925,
"rewards/accuracy_reward": 0.39267578125,
"rewards/brier_reward": 0.7212756514549256,
"rewards/confidence_uniqueness_reward": 0.9402257800102234,
"rewards/format_reward": 0.99736328125,
"rewards/frontier_aurc_reward": -0.004438658151775599,
"rewards/frontier_coverage_0": 0.12889230251312256,
"rewards/frontier_coverage_1": 0.12889230251312256,
"rewards/frontier_coverage_10": 0.12889230251312256,
"rewards/frontier_coverage_15": 0.12889230251312256,
"rewards/frontier_coverage_20": 0.12889230251312256,
"rewards/frontier_coverage_25": 0.12889230251312256,
"rewards/frontier_coverage_5": 0.12889230251312256,
"rewards/frontier_ece_reward": 0.0015361378580564633,
"rewards/frontier_entropy_batch_reward": -0.3880440592765808,
"signal/accuracy_reward/centered_abs_mean": 0.167462158203125,
"signal/accuracy_reward/group_bin_occupancy": 0.197265625,
"signal/accuracy_reward/group_std_mean": 0.21296925246715545,
"signal/accuracy_reward/group_zero_std_frac": 0.421875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0837310791015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0837310791015625,
"signal/advantage_abs_mean": 0.09632081687450408,
"signal/advantage_pre_scale_abs_mean": 0.09632081687450408,
"signal/advantage_pre_scale_std": 0.13876967430114745,
"signal/advantage_std": 0.13876967430114745,
"signal/brier_reward/centered_abs_mean": 0.19647954106330873,
"signal/brier_reward/group_bin_occupancy": 0.883984375,
"signal/brier_reward/group_std_mean": 0.248234623670578,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019647954031825066,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019647954031825066,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024874152988195418,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.905859375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03841259628534317,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024874153779819606,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024874153779819606,
"signal/format_reward/centered_abs_mean": 0.005108642578125,
"signal/format_reward/group_bin_occupancy": 0.135546875,
"signal/format_reward/group_std_mean": 0.014915533270686865,
"signal/format_reward/group_zero_std_frac": 0.915625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025543212890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0025543212890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013979610754176973,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.748046875,
"signal/frontier_aurc_reward/group_std_mean": 0.0022263232618570327,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7474513515480795e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7474513515480795e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_0/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_0/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_1/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_1/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_10/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_10/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_15/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_15/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_20/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_20/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_25/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_25/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_5/centered_abs_mean": 0.3031778931617737,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_5/group_std_mean": 0.3761015355587006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037897238973528145,
"signal/frontier_ece_reward/centered_abs_mean": 0.05225505083799362,
"signal/frontier_ece_reward/group_bin_occupancy": 0.816796875,
"signal/frontier_ece_reward/group_std_mean": 0.0818573072552681,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00522550530731678,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00522550530731678,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.420942884683609,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.778515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.483720475435257,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042094288021326066,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042094288021326066,
"step": 40
},
{
"calibration/aurc": 0.4175940179177261,
"calibration/batch_distribution_entropy": 0.979250693438256,
"calibration/batch_entropy_100bins": 0.9653378707811878,
"calibration/batch_entropy_10bins": 0.979250693438256,
"calibration/batch_entropy_50bins": 0.9767701739682157,
"calibration/batch_uniqueness": 0.9545605405154486,
"calibration/buffer_distribution_entropy": 0.9248228875256566,
"calibration/buffer_entropy_100bins": 0.8047559482027328,
"calibration/buffer_entropy_10bins": 0.9248228875256566,
"calibration/buffer_entropy_50bins": 0.8588074955955491,
"calibration/confidence_entropy": 0.5313858867098616,
"calibration/coverage@0%": 0.0011734038649706458,
"calibration/coverage@1%": 0.0011734038649706458,
"calibration/coverage@10%": 0.012892153864970645,
"calibration/coverage@15%": 0.014454653864970645,
"calibration/coverage@20%": 0.07773590386497065,
"calibration/coverage@25%": 0.20117340386497062,
"calibration/coverage@30%": 0.20430451932485322,
"calibration/coverage@5%": 0.0011734038649706458,
"calibration/ece": 0.24015715380201258,
"calibration/mean_confidence": 0.4763771020182851,
"calibration/prompt_uniqueness": 0.894472107505203,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1201.6,
"completions/max_terminated_length": 711.0,
"completions/mean_length": 107.3626953125,
"completions/mean_terminated_length": 106.80433654785156,
"completions/min_length": 41.8,
"completions/min_terminated_length": 41.8,
"epoch": 0.144,
"grad_norm": 0.002021110150963068,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 149456921.0,
"reward": 0.8988601326942444,
"reward_std": 0.13267979323863982,
"rewards/accuracy_reward": 0.50439453125,
"rewards/brier_reward": 0.7083608627319335,
"rewards/confidence_uniqueness_reward": 0.9530706882476807,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.003968859650194645,
"rewards/frontier_coverage_0": 0.02937074126675725,
"rewards/frontier_coverage_1": 0.02937074126675725,
"rewards/frontier_coverage_10": 0.02937074126675725,
"rewards/frontier_coverage_15": 0.02937074126675725,
"rewards/frontier_coverage_20": 0.02937074126675725,
"rewards/frontier_coverage_25": 0.02937074126675725,
"rewards/frontier_coverage_5": 0.02937074126675725,
"rewards/frontier_ece_reward": 0.006849961820989847,
"rewards/frontier_entropy_batch_reward": -0.22197339236736296,
"signal/accuracy_reward/centered_abs_mean": 0.164532470703125,
"signal/accuracy_reward/group_bin_occupancy": 0.202734375,
"signal/accuracy_reward/group_std_mean": 0.21746462881565093,
"signal/accuracy_reward/group_zero_std_frac": 0.378125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0822662353515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0822662353515625,
"signal/advantage_abs_mean": 0.1036305546760559,
"signal/advantage_pre_scale_abs_mean": 0.1036305546760559,
"signal/advantage_pre_scale_std": 0.14524299502372742,
"signal/advantage_std": 0.14524299502372742,
"signal/brier_reward/centered_abs_mean": 0.20842026472091674,
"signal/brier_reward/group_bin_occupancy": 0.92578125,
"signal/brier_reward/group_std_mean": 0.2574777901172638,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02084202691912651,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02084202691912651,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015083288960158824,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.941015625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02174595184624195,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001508328877389431,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001508328877389431,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_bin_occupancy": 0.12890625,
"signal/format_reward/group_std_mean": 0.005524271540343762,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020871605491265656,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.786328125,
"signal/frontier_aurc_reward/group_std_mean": 0.003077511163428426,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6089507082360795e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6089507082360795e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_0/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_0/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_1/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_1/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_1/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_10/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_10/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_10/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_15/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_15/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_15/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_20/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_20/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_20/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_25/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_25/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_5/centered_abs_mean": 0.261399644613266,
"signal/frontier_coverage_5/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_5/group_std_mean": 0.3283530294895172,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032674957066774367,
"signal/frontier_ece_reward/centered_abs_mean": 0.0648654729127884,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8375,
"signal/frontier_ece_reward/group_std_mean": 0.096609228849411,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006486547738313675,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006486547738313675,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3109690427780151,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38899595737457277,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03109690472483635,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03109690472483635,
"step": 45
},
{
"calibration/aurc": 0.45887670957463256,
"calibration/batch_distribution_entropy": 0.986413046557384,
"calibration/batch_entropy_100bins": 0.9701950864389216,
"calibration/batch_entropy_10bins": 0.986413046557384,
"calibration/batch_entropy_50bins": 0.9786926740327588,
"calibration/batch_uniqueness": 0.9566806171713684,
"calibration/buffer_distribution_entropy": 0.94049635911715,
"calibration/buffer_entropy_100bins": 0.8437552164763602,
"calibration/buffer_entropy_10bins": 0.94049635911715,
"calibration/buffer_entropy_50bins": 0.8891188471793313,
"calibration/confidence_entropy": 0.5183146562190272,
"calibration/coverage@0%": 0.000392156862745098,
"calibration/coverage@1%": 0.000392156862745098,
"calibration/coverage@10%": 0.000392156862745098,
"calibration/coverage@15%": 0.000392156862745098,
"calibration/coverage@20%": 0.005079656862745098,
"calibration/coverage@25%": 0.017189031862745098,
"calibration/coverage@30%": 0.025001531862745098,
"calibration/coverage@5%": 0.000392156862745098,
"calibration/ece": 0.16821106842628572,
"calibration/mean_confidence": 0.5462577650957192,
"calibration/prompt_uniqueness": 0.895401010503382,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1131.4,
"completions/max_terminated_length": 745.4,
"completions/mean_length": 116.3083984375,
"completions/mean_terminated_length": 115.8933090209961,
"completions/min_length": 46.4,
"completions/min_terminated_length": 46.4,
"epoch": 0.16,
"grad_norm": 0.0017517129890620708,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 165668847.0,
"reward": 0.8806891083717346,
"reward_std": 0.1352734684944153,
"rewards/accuracy_reward": 0.45361328125,
"rewards/brier_reward": 0.7112634301185607,
"rewards/confidence_uniqueness_reward": 0.9563660025596619,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0043065791018307206,
"rewards/frontier_coverage_0": 0.06639667674899101,
"rewards/frontier_coverage_1": 0.06639667674899101,
"rewards/frontier_coverage_10": 0.06639667674899101,
"rewards/frontier_coverage_15": 0.06639667674899101,
"rewards/frontier_coverage_20": 0.06639667674899101,
"rewards/frontier_coverage_25": 0.06639667674899101,
"rewards/frontier_coverage_5": 0.06639667674899101,
"rewards/frontier_ece_reward": 0.005921919783577323,
"rewards/frontier_entropy_batch_reward": -0.18935585916042327,
"signal/accuracy_reward/centered_abs_mean": 0.159417724609375,
"signal/accuracy_reward/group_bin_occupancy": 0.1921875,
"signal/accuracy_reward/group_std_mean": 0.20114850401878356,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0797088623046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0797088623046875,
"signal/advantage_abs_mean": 0.10809851884841919,
"signal/advantage_pre_scale_abs_mean": 0.10809851884841919,
"signal/advantage_pre_scale_std": 0.1511505126953125,
"signal/advantage_std": 0.1511505126953125,
"signal/brier_reward/centered_abs_mean": 0.21221804320812226,
"signal/brier_reward/group_bin_occupancy": 0.920703125,
"signal/brier_reward/group_std_mean": 0.2602735161781311,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122180461883545,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02122180461883545,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01266609001904726,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.947265625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017678024619817732,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012666089925915003,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012666089925915003,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002775211539119482,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.80078125,
"signal/frontier_aurc_reward/group_std_mean": 0.0039797000586986545,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4690145548665895e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4690145548665895e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_0/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_0/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_1/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_1/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_1/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_10/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_10/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_10/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_15/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_15/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_15/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_20/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_20/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_20/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_25/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_25/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_25/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_5/centered_abs_mean": 0.2263072282075882,
"signal/frontier_coverage_5/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_5/group_std_mean": 0.2919350802898407,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028288405854254963,
"signal/frontier_ece_reward/centered_abs_mean": 0.06984314173460007,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89296875,
"signal/frontier_ece_reward/group_std_mean": 0.0957074835896492,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006984313949942589,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006984313949942589,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799877405166626,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755078125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35995004177093504,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027998774126172066,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027998774126172066,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.6514437234622675,
"eval_calibration/batch_distribution_entropy": 0.9300585720206449,
"eval_calibration/batch_entropy_100bins": 0.6905406002692105,
"eval_calibration/batch_entropy_10bins": 0.9300585720206449,
"eval_calibration/batch_entropy_50bins": 0.7765363996158673,
"eval_calibration/batch_uniqueness": 0.8994140625,
"eval_calibration/buffer_distribution_entropy": 0.9462761211614454,
"eval_calibration/buffer_entropy_100bins": 0.8623138320227769,
"eval_calibration/buffer_entropy_10bins": 0.9462761211614454,
"eval_calibration/buffer_entropy_50bins": 0.9029670207624898,
"eval_calibration/confidence_entropy": 0.5075045392400461,
"eval_calibration/coverage@0%": 0.0078125,
"eval_calibration/coverage@1%": 0.0078125,
"eval_calibration/coverage@10%": 0.0078125,
"eval_calibration/coverage@15%": 0.0078125,
"eval_calibration/coverage@20%": 0.0078125,
"eval_calibration/coverage@25%": 0.0078125,
"eval_calibration/coverage@30%": 0.0078125,
"eval_calibration/coverage@5%": 0.0078125,
"eval_calibration/ece": 0.310032909152462,
"eval_calibration/mean_confidence": 0.5270542874048246,
"eval_calibration/prompt_uniqueness": 0.8994140625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 294.75,
"eval_completions/max_terminated_length": 294.75,
"eval_completions/mean_length": 126.42483901977539,
"eval_completions/mean_terminated_length": 126.42483901977539,
"eval_completions/min_length": 61.5,
"eval_completions/min_terminated_length": 61.5,
"eval_loss": 0.0,
"eval_num_tokens": 165668847.0,
"eval_reward": 0.7603507339954376,
"eval_reward_std": 0.23446262627840042,
"eval_rewards/accuracy_reward": 0.3828125,
"eval_rewards/brier_reward": 0.6980591118335724,
"eval_rewards/confidence_uniqueness_reward": 0.898681640625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004815749707631767,
"eval_rewards/frontier_coverage_0": 0.10749666392803192,
"eval_rewards/frontier_coverage_1": 0.10749666392803192,
"eval_rewards/frontier_coverage_10": 0.10749666392803192,
"eval_rewards/frontier_coverage_15": 0.10749666392803192,
"eval_rewards/frontier_coverage_20": 0.10749666392803192,
"eval_rewards/frontier_coverage_25": 0.10749666392803192,
"eval_rewards/frontier_coverage_5": 0.10749666392803192,
"eval_rewards/frontier_ece_reward": -0.0007533840253017843,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 17.0809,
"eval_samples_per_second": 29.272,
"eval_signal/accuracy_reward/centered_abs_mean": 0.45849609375,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.485101580619812,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.229248046875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.229248046875,
"eval_signal/advantage_abs_mean": 0.21446801349520683,
"eval_signal/advantage_pre_scale_abs_mean": 0.21446801349520683,
"eval_signal/advantage_pre_scale_std": 0.23238081485033035,
"eval_signal/advantage_std": 0.23238081485033035,
"eval_signal/brier_reward/centered_abs_mean": 0.23739226162433624,
"eval_signal/brier_reward/group_bin_occupancy": 0.96875,
"eval_signal/brier_reward/group_std_mean": 0.28240957856178284,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02373922662809491,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02373922662809491,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0388031005859375,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04573572054505348,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038803101051598787,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038803101051598787,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003774499346036464,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.859375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0055051157251000404,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7181244553939905e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7181244553939905e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3006228432059288,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4013464003801346,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037577852490358055,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07121825404465199,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8984375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.10425052046775818,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007121825474314392,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007121825474314392,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.234,
"step": 50
},
{
"calibration/aurc": 0.41083644913153056,
"calibration/batch_distribution_entropy": 0.9935570906004912,
"calibration/batch_entropy_100bins": 0.9715693910526587,
"calibration/batch_entropy_10bins": 0.9935570906004912,
"calibration/batch_entropy_50bins": 0.9855893029338851,
"calibration/batch_uniqueness": 0.9587646484375,
"calibration/buffer_distribution_entropy": 0.9504008284813109,
"calibration/buffer_entropy_100bins": 0.8723124863154184,
"calibration/buffer_entropy_10bins": 0.9504008284813109,
"calibration/buffer_entropy_50bins": 0.9103123756016671,
"calibration/confidence_entropy": 0.4926616876541866,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.00078125,
"calibration/coverage@15%": 0.00078125,
"calibration/coverage@20%": 0.014453125,
"calibration/coverage@25%": 0.027734375,
"calibration/coverage@30%": 0.1140625,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.19613354282701326,
"calibration/mean_confidence": 0.5161718102828323,
"calibration/prompt_uniqueness": 0.89189453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 873.2,
"completions/max_terminated_length": 439.2,
"completions/mean_length": 132.3017578125,
"completions/mean_terminated_length": 132.02688598632812,
"completions/min_length": 53.8,
"completions/min_terminated_length": 53.8,
"epoch": 0.176,
"grad_norm": 0.0015510269440710545,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 182260737.0,
"reward": 0.8931734323501587,
"reward_std": 0.12140908688306809,
"rewards/accuracy_reward": 0.46357421875,
"rewards/brier_reward": 0.7294286847114563,
"rewards/confidence_uniqueness_reward": 0.9577026724815368,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.003854288347065449,
"rewards/frontier_coverage_0": 0.09462544620037079,
"rewards/frontier_coverage_1": 0.09462544620037079,
"rewards/frontier_coverage_10": 0.09462544620037079,
"rewards/frontier_coverage_15": 0.09462544620037079,
"rewards/frontier_coverage_20": 0.09462544620037079,
"rewards/frontier_coverage_25": 0.09462544620037079,
"rewards/frontier_coverage_5": 0.09462544620037079,
"rewards/frontier_ece_reward": 0.013301673159003258,
"rewards/frontier_entropy_batch_reward": -0.1659554123878479,
"signal/accuracy_reward/centered_abs_mean": 0.140850830078125,
"signal/accuracy_reward/group_bin_occupancy": 0.188671875,
"signal/accuracy_reward/group_std_mean": 0.18363622725009918,
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0704254150390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0704254150390625,
"signal/advantage_abs_mean": 0.09490404278039932,
"signal/advantage_pre_scale_abs_mean": 0.09490404278039932,
"signal/advantage_pre_scale_std": 0.13685409128665924,
"signal/advantage_std": 0.13685409128665924,
"signal/brier_reward/centered_abs_mean": 0.20794688463211058,
"signal/brier_reward/group_bin_occupancy": 0.905078125,
"signal/brier_reward/group_std_mean": 0.25734142661094667,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020794688165187834,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020794688165187834,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011929828859865665,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016922668367624284,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011929828440770506,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011929828440770506,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002570530725643039,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.776953125,
"signal/frontier_aurc_reward/group_std_mean": 0.0038315205834805965,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.213163508917205e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.213163508917205e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_0/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_0/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_1/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_1/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_10/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_10/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_15/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_15/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_20/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_20/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_25/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_25/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_5/centered_abs_mean": 0.25130972266197205,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_5/group_std_mean": 0.3175831615924835,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003141371626406908,
"signal/frontier_ece_reward/centered_abs_mean": 0.06303459852933883,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8765625,
"signal/frontier_ece_reward/group_std_mean": 0.08597700744867325,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006303459964692592,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006303459964692592,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2539799213409424,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33189951777458193,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02539799325168133,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02539799325168133,
"step": 55
},
{
"calibration/aurc": 0.34942118001298506,
"calibration/batch_distribution_entropy": 0.9799110967706934,
"calibration/batch_entropy_100bins": 0.9610832692207,
"calibration/batch_entropy_10bins": 0.9799110967706934,
"calibration/batch_entropy_50bins": 0.9736960127197387,
"calibration/batch_uniqueness": 0.9573197846138358,
"calibration/buffer_distribution_entropy": 0.9584469854664572,
"calibration/buffer_entropy_100bins": 0.8916949290759867,
"calibration/buffer_entropy_10bins": 0.9584469854664572,
"calibration/buffer_entropy_50bins": 0.9246897994577994,
"calibration/confidence_entropy": 0.46040453975152423,
"calibration/coverage@0%": 0.010939028864970645,
"calibration/coverage@1%": 0.010939028864970645,
"calibration/coverage@10%": 0.014454653864970645,
"calibration/coverage@15%": 0.01915132705479452,
"calibration/coverage@20%": 0.10548938967710372,
"calibration/coverage@25%": 0.20474865459882582,
"calibration/coverage@30%": 0.3540063906555773,
"calibration/coverage@5%": 0.010939028864970645,
"calibration/ece": 0.14488413962169583,
"calibration/mean_confidence": 0.4789294839135028,
"calibration/prompt_uniqueness": 0.8839207612513007,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 870.8,
"completions/max_terminated_length": 420.6,
"completions/mean_length": 143.64931640625,
"completions/mean_terminated_length": 143.24107360839844,
"completions/min_length": 60.6,
"completions/min_terminated_length": 60.6,
"epoch": 0.192,
"grad_norm": 0.0014950234908610582,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 198546522.0,
"reward": 0.9052001118659974,
"reward_std": 0.11826727986335754,
"rewards/accuracy_reward": 0.491796875,
"rewards/brier_reward": 0.7406868457794189,
"rewards/confidence_uniqueness_reward": 0.9558995842933655,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0034085330553352833,
"rewards/frontier_coverage_0": 0.09948756024241448,
"rewards/frontier_coverage_1": 0.09948756024241448,
"rewards/frontier_coverage_10": 0.09948756024241448,
"rewards/frontier_coverage_15": 0.09948756024241448,
"rewards/frontier_coverage_20": 0.09948756024241448,
"rewards/frontier_coverage_25": 0.09948756024241448,
"rewards/frontier_coverage_5": 0.09948756024241448,
"rewards/frontier_ece_reward": 0.01857722718268633,
"rewards/frontier_entropy_batch_reward": -0.20535460412502288,
"signal/accuracy_reward/centered_abs_mean": 0.13638916015625,
"signal/accuracy_reward/group_bin_occupancy": 0.18984375,
"signal/accuracy_reward/group_std_mean": 0.18098436594009398,
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.068194580078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.068194580078125,
"signal/advantage_abs_mean": 0.0910866379737854,
"signal/advantage_pre_scale_abs_mean": 0.0910866379737854,
"signal/advantage_pre_scale_std": 0.1333424761891365,
"signal/advantage_std": 0.1333424761891365,
"signal/brier_reward/centered_abs_mean": 0.2078978717327118,
"signal/brier_reward/group_bin_occupancy": 0.876953125,
"signal/brier_reward/group_std_mean": 0.2592237114906311,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020789787545800208,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020789787545800208,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015980724617838858,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.897265625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02260695695877075,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015980724710971117,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015980724710971117,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.003866990189999342,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002407692139968276,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.776171875,
"signal/frontier_aurc_reward/group_std_mean": 0.00357803120277822,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.009615102200769e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.009615102200769e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_0/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_0/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_1/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_1/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_1/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_10/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_10/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_10/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_15/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_15/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_15/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_20/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_20/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_20/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_25/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_25/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_25/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_5/centered_abs_mean": 0.262815922498703,
"signal/frontier_coverage_5/group_bin_occupancy": 0.908203125,
"signal/frontier_coverage_5/group_std_mean": 0.33183927536010743,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003285199077799916,
"signal/frontier_ece_reward/centered_abs_mean": 0.057891517877578735,
"signal/frontier_ece_reward/group_bin_occupancy": 0.860546875,
"signal/frontier_ece_reward/group_std_mean": 0.07930349558591843,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005789151694625616,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005789151694625616,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3005888402462006,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.749609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.379769903421402,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030058884248137473,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030058884248137473,
"step": 60
},
{
"calibration/aurc": 0.2947941744415761,
"calibration/batch_distribution_entropy": 0.9785599217065368,
"calibration/batch_entropy_100bins": 0.9581661507133953,
"calibration/batch_entropy_10bins": 0.9785599217065368,
"calibration/batch_entropy_50bins": 0.9720473639947895,
"calibration/batch_uniqueness": 0.9595977783203125,
"calibration/buffer_distribution_entropy": 0.963559167866169,
"calibration/buffer_entropy_100bins": 0.906266340287414,
"calibration/buffer_entropy_10bins": 0.963559167866169,
"calibration/buffer_entropy_50bins": 0.9352872608533884,
"calibration/confidence_entropy": 0.47836497278573387,
"calibration/coverage@0%": 0.012109375,
"calibration/coverage@1%": 0.012109375,
"calibration/coverage@10%": 0.0625,
"calibration/coverage@15%": 0.185546875,
"calibration/coverage@20%": 0.290234375,
"calibration/coverage@25%": 0.455078125,
"calibration/coverage@30%": 0.572265625,
"calibration/coverage@5%": 0.021484375,
"calibration/ece": 0.15920833551919994,
"calibration/mean_confidence": 0.5314152702200673,
"calibration/prompt_uniqueness": 0.88876953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 461.8,
"completions/max_terminated_length": 461.8,
"completions/mean_length": 155.57880859375,
"completions/mean_terminated_length": 155.57880859375,
"completions/min_length": 65.8,
"completions/min_terminated_length": 65.8,
"epoch": 0.208,
"grad_norm": 0.0011343832593411207,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 215171873.0,
"reward": 0.9295921802520752,
"reward_std": 0.11272455304861069,
"rewards/accuracy_reward": 0.5361328125,
"rewards/brier_reward": 0.753898274898529,
"rewards/confidence_uniqueness_reward": 0.960012423992157,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003036556579172611,
"rewards/frontier_coverage_0": 0.07245685756206513,
"rewards/frontier_coverage_1": 0.07245685756206513,
"rewards/frontier_coverage_10": 0.07245685756206513,
"rewards/frontier_coverage_15": 0.07245685756206513,
"rewards/frontier_coverage_20": 0.07245685756206513,
"rewards/frontier_coverage_25": 0.07245685756206513,
"rewards/frontier_coverage_5": 0.07245685756206513,
"rewards/frontier_ece_reward": 0.022372994944453238,
"rewards/frontier_entropy_batch_reward": -0.18355790972709657,
"signal/accuracy_reward/centered_abs_mean": 0.13197021484375,
"signal/accuracy_reward/group_bin_occupancy": 0.187890625,
"signal/accuracy_reward/group_std_mean": 0.17548914551734923,
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065985107421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.065985107421875,
"signal/advantage_abs_mean": 0.08748974055051803,
"signal/advantage_pre_scale_abs_mean": 0.08748974055051803,
"signal/advantage_pre_scale_std": 0.1294364556670189,
"signal/advantage_std": 0.1294364556670189,
"signal/brier_reward/centered_abs_mean": 0.1890464246273041,
"signal/brier_reward/group_bin_occupancy": 0.86875,
"signal/brier_reward/group_std_mean": 0.23756815493106842,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0189046423882246,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0189046423882246,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012290091067552567,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015977666527032853,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012290091253817081,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012290091253817081,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024446202907711266,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.78046875,
"signal/frontier_aurc_reward/group_std_mean": 0.003613197011873126,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.055775378015824e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.055775378015824e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_0/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_1/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_1/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_10/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_10/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_15/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_15/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_20/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_20/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_25/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_25/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_5/centered_abs_mean": 0.22794330716133118,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_5/group_std_mean": 0.2930518627166748,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00284929140470922,
"signal/frontier_ece_reward/centered_abs_mean": 0.0545510284602642,
"signal/frontier_ece_reward/group_bin_occupancy": 0.84921875,
"signal/frontier_ece_reward/group_std_mean": 0.07529444098472596,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00545510295778513,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00545510295778513,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27159354090690613,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3504547536373138,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027159354835748672,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027159354835748672,
"step": 65
},
{
"calibration/aurc": 0.3189934672338976,
"calibration/batch_distribution_entropy": 0.9939897166524112,
"calibration/batch_entropy_100bins": 0.967330341286187,
"calibration/batch_entropy_10bins": 0.9939897166524112,
"calibration/batch_entropy_50bins": 0.9831548626039949,
"calibration/batch_uniqueness": 0.9600825121686686,
"calibration/buffer_distribution_entropy": 0.9681576729442384,
"calibration/buffer_entropy_100bins": 0.9180615027527184,
"calibration/buffer_entropy_10bins": 0.9681576729442384,
"calibration/buffer_entropy_50bins": 0.9437593307933965,
"calibration/confidence_entropy": 0.4933469731122976,
"calibration/coverage@0%": 0.00390625,
"calibration/coverage@1%": 0.00390625,
"calibration/coverage@10%": 0.09296875,
"calibration/coverage@15%": 0.2039675245098039,
"calibration/coverage@20%": 0.29543045343137253,
"calibration/coverage@25%": 0.4104151348039215,
"calibration/coverage@30%": 0.5085263480392157,
"calibration/coverage@5%": 0.00390625,
"calibration/ece": 0.15230069879851205,
"calibration/mean_confidence": 0.5016724122037817,
"calibration/prompt_uniqueness": 0.885172063334634,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1095.8,
"completions/max_terminated_length": 681.2,
"completions/mean_length": 163.984765625,
"completions/mean_terminated_length": 163.58394470214844,
"completions/min_length": 66.8,
"completions/min_terminated_length": 66.8,
"epoch": 0.224,
"grad_norm": 0.0011190164368599653,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 232004261.0,
"reward": 0.9104782462120056,
"reward_std": 0.10773791372776031,
"rewards/accuracy_reward": 0.48671875,
"rewards/brier_reward": 0.7708696365356446,
"rewards/confidence_uniqueness_reward": 0.9604137420654297,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003139969985932112,
"rewards/frontier_coverage_0": 0.11844078451395035,
"rewards/frontier_coverage_1": 0.11844078451395035,
"rewards/frontier_coverage_10": 0.11844078451395035,
"rewards/frontier_coverage_15": 0.11844078451395035,
"rewards/frontier_coverage_20": 0.11844078451395035,
"rewards/frontier_coverage_25": 0.11844078451395035,
"rewards/frontier_coverage_5": 0.11844078451395035,
"rewards/frontier_ece_reward": 0.021617041900753975,
"rewards/frontier_entropy_batch_reward": -0.18300187289714814,
"signal/accuracy_reward/centered_abs_mean": 0.1204345703125,
"signal/accuracy_reward/group_bin_occupancy": 0.182421875,
"signal/accuracy_reward/group_std_mean": 0.15962167084217072,
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06021728515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06021728515625,
"signal/advantage_abs_mean": 0.08397592157125473,
"signal/advantage_pre_scale_abs_mean": 0.08397592157125473,
"signal/advantage_pre_scale_std": 0.1261191889643669,
"signal/advantage_std": 0.1261191889643669,
"signal/brier_reward/centered_abs_mean": 0.17388453483581542,
"signal/brier_reward/group_bin_occupancy": 0.8765625,
"signal/brier_reward/group_std_mean": 0.219650474190712,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01738845370709896,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01738845370709896,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012389418855309486,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9328125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01659379303455353,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012389418901875616,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012389418901875616,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023782884702086447,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7796875,
"signal/frontier_aurc_reward/group_std_mean": 0.003504908038303256,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.972860493173357e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.972860493173357e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_0/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_0/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_1/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_1/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_1/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_10/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_10/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_10/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_15/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_15/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_15/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_20/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_20/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_20/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_25/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_25/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_25/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_5/centered_abs_mean": 0.21117229461669923,
"signal/frontier_coverage_5/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_5/group_std_mean": 0.27140182852745054,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026396537199616433,
"signal/frontier_ece_reward/centered_abs_mean": 0.04839524030685425,
"signal/frontier_ece_reward/group_bin_occupancy": 0.842578125,
"signal/frontier_ece_reward/group_std_mean": 0.06754831522703171,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004839524254202842,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004839524254202842,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2688676655292511,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34602165818214414,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026886767894029617,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026886767894029617,
"step": 70
},
{
"calibration/aurc": 0.3722649127966441,
"calibration/batch_distribution_entropy": 0.9747995210345157,
"calibration/batch_entropy_100bins": 0.9571055886452392,
"calibration/batch_entropy_10bins": 0.9747995210345157,
"calibration/batch_entropy_50bins": 0.9714064992183363,
"calibration/batch_uniqueness": 0.9584098953076943,
"calibration/buffer_distribution_entropy": 0.9721121659408224,
"calibration/buffer_entropy_100bins": 0.9276944237884651,
"calibration/buffer_entropy_10bins": 0.9721121659408224,
"calibration/buffer_entropy_50bins": 0.9509444674613132,
"calibration/confidence_entropy": 0.4904281243945256,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.078125,
"calibration/coverage@15%": 0.144921875,
"calibration/coverage@20%": 0.20859375,
"calibration/coverage@25%": 0.257421875,
"calibration/coverage@30%": 0.278515625,
"calibration/coverage@5%": 0.036328125,
"calibration/ece": 0.17131989400604017,
"calibration/mean_confidence": 0.5100639151774843,
"calibration/prompt_uniqueness": 0.8860584270941727,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1245.6,
"completions/max_terminated_length": 700.6,
"completions/mean_length": 166.92529296875,
"completions/mean_terminated_length": 166.2565490722656,
"completions/min_length": 68.4,
"completions/min_terminated_length": 68.4,
"epoch": 0.24,
"grad_norm": 0.0012799223186448216,
"learning_rate": 1e-06,
"loss": 0.0017,
"num_tokens": 248965256.0,
"reward": 0.9279002666473388,
"reward_std": 0.11562621295452118,
"rewards/accuracy_reward": 0.54033203125,
"rewards/brier_reward": 0.752100133895874,
"rewards/confidence_uniqueness_reward": 0.9600031733512878,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.0029632408171892167,
"rewards/frontier_coverage_0": 0.06165201477706432,
"rewards/frontier_coverage_1": 0.06165201477706432,
"rewards/frontier_coverage_10": 0.06165201477706432,
"rewards/frontier_coverage_15": 0.06165201477706432,
"rewards/frontier_coverage_20": 0.06165201477706432,
"rewards/frontier_coverage_25": 0.06165201477706432,
"rewards/frontier_coverage_5": 0.06165201477706432,
"rewards/frontier_ece_reward": 0.018022438511252403,
"rewards/frontier_entropy_batch_reward": -0.2034287005662918,
"signal/accuracy_reward/centered_abs_mean": 0.137420654296875,
"signal/accuracy_reward/group_bin_occupancy": 0.18828125,
"signal/accuracy_reward/group_std_mean": 0.1800607681274414,
"signal/accuracy_reward/group_zero_std_frac": 0.49375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0687103271484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0687103271484375,
"signal/advantage_abs_mean": 0.0900656446814537,
"signal/advantage_pre_scale_abs_mean": 0.0900656446814537,
"signal/advantage_pre_scale_std": 0.13366247713565826,
"signal/advantage_std": 0.13366247713565826,
"signal/brier_reward/centered_abs_mean": 0.17995524406433105,
"signal/brier_reward/group_bin_occupancy": 0.87421875,
"signal/brier_reward/group_std_mean": 0.2263825535774231,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017995523661375044,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017995523661375044,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012892700731754303,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.92578125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018097008019685744,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012892701663076878,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012892701663076878,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002403355622664094,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.784375,
"signal/frontier_aurc_reward/group_std_mean": 0.0035244593862444164,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0041945865377784e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0041945865377784e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_0/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_1/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_1/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_10/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_10/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_15/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_15/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_20/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_20/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_25/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_25/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_5/centered_abs_mean": 0.21193841695785523,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8953125,
"signal/frontier_coverage_5/group_std_mean": 0.2735629081726074,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026492302305996416,
"signal/frontier_ece_reward/centered_abs_mean": 0.04577092379331589,
"signal/frontier_ece_reward/group_bin_occupancy": 0.835546875,
"signal/frontier_ece_reward/group_std_mean": 0.06537232622504234,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004577092453837394,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004577092453837394,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28712775707244875,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3657856583595276,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02871277555823326,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02871277555823326,
"step": 75
},
{
"calibration/aurc": 0.2756884567471795,
"calibration/batch_distribution_entropy": 0.981849015365291,
"calibration/batch_entropy_100bins": 0.963794621717061,
"calibration/batch_entropy_10bins": 0.981849015365291,
"calibration/batch_entropy_50bins": 0.9772544795865941,
"calibration/batch_uniqueness": 0.9583403270974067,
"calibration/buffer_distribution_entropy": 0.9748055844875566,
"calibration/buffer_entropy_100bins": 0.9354970264003273,
"calibration/buffer_entropy_10bins": 0.9748055844875566,
"calibration/buffer_entropy_50bins": 0.956262724283142,
"calibration/confidence_entropy": 0.47419515340800844,
"calibration/coverage@0%": 0.019922639432485324,
"calibration/coverage@1%": 0.019922639432485324,
"calibration/coverage@10%": 0.1671882644324853,
"calibration/coverage@15%": 0.29453660102739726,
"calibration/coverage@20%": 0.3945366010273973,
"calibration/coverage@25%": 0.46133347602739727,
"calibration/coverage@30%": 0.6404713490704501,
"calibration/coverage@5%": 0.08242263943248532,
"calibration/ece": 0.1516829189459081,
"calibration/mean_confidence": 0.4916690845191692,
"calibration/prompt_uniqueness": 0.8781654445239333,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 878.2,
"completions/max_terminated_length": 482.8,
"completions/mean_length": 163.8306640625,
"completions/mean_terminated_length": 163.4287567138672,
"completions/min_length": 66.0,
"completions/min_terminated_length": 66.0,
"epoch": 0.256,
"grad_norm": 0.001148949726484716,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 265697698.0,
"reward": 0.9235002279281617,
"reward_std": 0.1053592398762703,
"rewards/accuracy_reward": 0.5205078125,
"rewards/brier_reward": 0.7646861553192139,
"rewards/confidence_uniqueness_reward": 0.9591035127639771,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0028343759011477234,
"rewards/frontier_coverage_0": 0.09306152537465096,
"rewards/frontier_coverage_1": 0.09306152537465096,
"rewards/frontier_coverage_10": 0.09306152537465096,
"rewards/frontier_coverage_15": 0.09306152537465096,
"rewards/frontier_coverage_20": 0.09306152537465096,
"rewards/frontier_coverage_25": 0.09306152537465096,
"rewards/frontier_coverage_5": 0.09306152537465096,
"rewards/frontier_ece_reward": 0.018909335136413574,
"rewards/frontier_entropy_batch_reward": -0.18935712277889252,
"signal/accuracy_reward/centered_abs_mean": 0.12547607421875,
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
"signal/accuracy_reward/group_std_mean": 0.16060097515583038,
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062738037109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062738037109375,
"signal/advantage_abs_mean": 0.08297341018915176,
"signal/advantage_pre_scale_abs_mean": 0.08297341018915176,
"signal/advantage_pre_scale_std": 0.12462374716997146,
"signal/advantage_std": 0.12462374716997146,
"signal/brier_reward/centered_abs_mean": 0.1717162013053894,
"signal/brier_reward/group_bin_occupancy": 0.865625,
"signal/brier_reward/group_std_mean": 0.21583383977413179,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017171620205044747,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017171620205044747,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012881658598780632,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.929296875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017492034845054148,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012881658738479018,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012881658738479018,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022053365129977463,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.780078125,
"signal/frontier_aurc_reward/group_std_mean": 0.00320956208743155,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7566706557990982e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7566706557990982e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_0/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_0/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_1/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_1/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_1/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_10/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_10/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_10/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_15/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_15/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_15/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_20/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_20/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_20/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_25/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_25/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_25/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_5/centered_abs_mean": 0.21591795980930328,
"signal/frontier_coverage_5/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_5/group_std_mean": 0.2742127299308777,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026989746373146774,
"signal/frontier_ece_reward/centered_abs_mean": 0.04098983183503151,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82890625,
"signal/frontier_ece_reward/group_std_mean": 0.05865926668047905,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004098983202129603,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004098983202129603,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27483277320861815,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35315130949020385,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027483277022838593,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027483277022838593,
"step": 80
},
{
"calibration/aurc": 0.3897235445216074,
"calibration/batch_distribution_entropy": 0.9871985803996584,
"calibration/batch_entropy_100bins": 0.9650498108188612,
"calibration/batch_entropy_10bins": 0.9871985803996584,
"calibration/batch_entropy_50bins": 0.9774071673496001,
"calibration/batch_uniqueness": 0.9601234608508132,
"calibration/buffer_distribution_entropy": 0.9776273835298188,
"calibration/buffer_entropy_100bins": 0.9421174103566827,
"calibration/buffer_entropy_10bins": 0.9776273835298188,
"calibration/buffer_entropy_50bins": 0.9610857397243047,
"calibration/confidence_entropy": 0.4835290106060966,
"calibration/coverage@0%": 0.0035194471624266145,
"calibration/coverage@1%": 0.0035194471624266145,
"calibration/coverage@10%": 0.05117569716242661,
"calibration/coverage@15%": 0.1171913221624266,
"calibration/coverage@20%": 0.15980461105675148,
"calibration/coverage@25%": 0.2211900684931507,
"calibration/coverage@30%": 0.3173709637964775,
"calibration/coverage@5%": 0.0035194471624266145,
"calibration/ece": 0.14380001971983383,
"calibration/mean_confidence": 0.5027473010005465,
"calibration/prompt_uniqueness": 0.880182234082336,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1118.0,
"completions/max_terminated_length": 649.4,
"completions/mean_length": 167.30830078125,
"completions/mean_terminated_length": 166.77369995117186,
"completions/min_length": 64.8,
"completions/min_terminated_length": 64.8,
"epoch": 0.272,
"grad_norm": 0.0011026699794456363,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 282376631.0,
"reward": 0.9143985748291016,
"reward_std": 0.10913633704185485,
"rewards/accuracy_reward": 0.503125,
"rewards/brier_reward": 0.7558487296104431,
"rewards/confidence_uniqueness_reward": 0.958933699131012,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.003067450597882271,
"rewards/frontier_coverage_0": 0.09763064086437226,
"rewards/frontier_coverage_1": 0.09763064086437226,
"rewards/frontier_coverage_10": 0.09763064086437226,
"rewards/frontier_coverage_15": 0.09763064086437226,
"rewards/frontier_coverage_20": 0.09763064086437226,
"rewards/frontier_coverage_25": 0.09763064086437226,
"rewards/frontier_coverage_5": 0.09763064086437226,
"rewards/frontier_ece_reward": 0.016011307016015053,
"rewards/frontier_entropy_batch_reward": -0.18454676866531372,
"signal/accuracy_reward/centered_abs_mean": 0.1283447265625,
"signal/accuracy_reward/group_bin_occupancy": 0.18359375,
"signal/accuracy_reward/group_std_mean": 0.1672771155834198,
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06417236328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06417236328125,
"signal/advantage_abs_mean": 0.08471592962741852,
"signal/advantage_pre_scale_abs_mean": 0.08471592962741852,
"signal/advantage_pre_scale_std": 0.12727494090795516,
"signal/advantage_std": 0.12727494090795516,
"signal/brier_reward/centered_abs_mean": 0.1756508618593216,
"signal/brier_reward/group_bin_occupancy": 0.872265625,
"signal/brier_reward/group_std_mean": 0.2209733545780182,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017565086483955383,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.017565086483955383,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013165917806327343,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018373236805200577,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013165918411687017,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013165918411687017,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002310941834002733,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.778125,
"signal/frontier_aurc_reward/group_std_mean": 0.0034583484288305045,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8886771542602218e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8886771542602218e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_0/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_1/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_1/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_10/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_10/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_15/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_15/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_20/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_20/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_25/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_25/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_5/centered_abs_mean": 0.21623624563217164,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_5/group_std_mean": 0.2773744761943817,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002702953014522791,
"signal/frontier_ece_reward/centered_abs_mean": 0.04007608145475387,
"signal/frontier_ece_reward/group_bin_occupancy": 0.824609375,
"signal/frontier_ece_reward/group_std_mean": 0.05737483724951744,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004007608164101839,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004007608164101839,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26959097683429717,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3461661696434021,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026959098130464553,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026959098130464553,
"step": 85
},
{
"calibration/aurc": 0.33929897089230937,
"calibration/batch_distribution_entropy": 0.989794172784116,
"calibration/batch_entropy_100bins": 0.9616161331101599,
"calibration/batch_entropy_10bins": 0.989794172784116,
"calibration/batch_entropy_50bins": 0.9782942991565935,
"calibration/batch_uniqueness": 0.9621734619140625,
"calibration/buffer_distribution_entropy": 0.9798353797281024,
"calibration/buffer_entropy_100bins": 0.9472962073477508,
"calibration/buffer_entropy_10bins": 0.9798353797281024,
"calibration/buffer_entropy_50bins": 0.9648110714996019,
"calibration/confidence_entropy": 0.5029849704441383,
"calibration/coverage@0%": 0.016796875,
"calibration/coverage@1%": 0.016796875,
"calibration/coverage@10%": 0.06796875,
"calibration/coverage@15%": 0.10859375,
"calibration/coverage@20%": 0.158203125,
"calibration/coverage@25%": 0.209375,
"calibration/coverage@30%": 0.330078125,
"calibration/coverage@5%": 0.026171875,
"calibration/ece": 0.13708101797225564,
"calibration/mean_confidence": 0.5260060638175437,
"calibration/prompt_uniqueness": 0.8890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 990.4,
"completions/max_terminated_length": 636.2,
"completions/mean_length": 157.38349609375,
"completions/mean_terminated_length": 157.11478271484376,
"completions/min_length": 65.2,
"completions/min_terminated_length": 65.2,
"epoch": 0.288,
"grad_norm": 0.0010967063717544079,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 298946414.0,
"reward": 0.9226956605911255,
"reward_std": 0.10692842602729798,
"rewards/accuracy_reward": 0.5185546875,
"rewards/brier_reward": 0.759922206401825,
"rewards/confidence_uniqueness_reward": 0.9619071364402771,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0029738360550254582,
"rewards/frontier_coverage_0": 0.08782123178243637,
"rewards/frontier_coverage_1": 0.08782123178243637,
"rewards/frontier_coverage_10": 0.08782123178243637,
"rewards/frontier_coverage_15": 0.08782123178243637,
"rewards/frontier_coverage_20": 0.08782123178243637,
"rewards/frontier_coverage_25": 0.08782123178243637,
"rewards/frontier_coverage_5": 0.08782123178243637,
"rewards/frontier_ece_reward": 0.015628389501944184,
"rewards/frontier_entropy_batch_reward": -0.17876963317394257,
"signal/accuracy_reward/centered_abs_mean": 0.12471923828125,
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
"signal/accuracy_reward/group_std_mean": 0.16823607087135314,
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062359619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062359619140625,
"signal/advantage_abs_mean": 0.08173956871032714,
"signal/advantage_pre_scale_abs_mean": 0.08173956871032714,
"signal/advantage_pre_scale_std": 0.12362392991781235,
"signal/advantage_std": 0.12362392991781235,
"signal/brier_reward/centered_abs_mean": 0.16985757648944855,
"signal/brier_reward/group_bin_occupancy": 0.8671875,
"signal/brier_reward/group_std_mean": 0.21388141214847564,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016985757648944853,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016985757648944853,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011985784396529198,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.937890625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015646530874073507,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011985784396529198,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011985784396529198,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022517605219036342,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.753125,
"signal/frontier_aurc_reward/group_std_mean": 0.00341446828097105,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8147007833467795e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8147007833467795e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_0/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_0/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_1/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_1/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_1/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_10/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_10/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_10/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_15/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_15/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_15/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_20/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_20/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_20/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_25/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_25/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_25/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_5/centered_abs_mean": 0.20966576039791107,
"signal/frontier_coverage_5/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_5/group_std_mean": 0.2668231546878815,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620822051540017,
"signal/frontier_ece_reward/centered_abs_mean": 0.03799701854586601,
"signal/frontier_ece_reward/group_bin_occupancy": 0.808203125,
"signal/frontier_ece_reward/group_std_mean": 0.05444479286670685,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003799702040851116,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003799702040851116,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26266041994094846,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3405035316944122,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626604326069355,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626604326069355,
"step": 90
},
{
"calibration/aurc": 0.31348549126500747,
"calibration/batch_distribution_entropy": 0.984274980481597,
"calibration/batch_entropy_100bins": 0.9631625065111967,
"calibration/batch_entropy_10bins": 0.984274980481597,
"calibration/batch_entropy_50bins": 0.9770299989819294,
"calibration/batch_uniqueness": 0.9626984046368829,
"calibration/buffer_distribution_entropy": 0.9815639661047172,
"calibration/buffer_entropy_100bins": 0.9515858500656005,
"calibration/buffer_entropy_10bins": 0.9815639661047172,
"calibration/buffer_entropy_50bins": 0.967851841375308,
"calibration/confidence_entropy": 0.5032728046019687,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.014453125,
"calibration/coverage@15%": 0.11645899584148726,
"calibration/coverage@20%": 0.2305727128180039,
"calibration/coverage@25%": 0.3513194104696673,
"calibration/coverage@30%": 0.4826749021526419,
"calibration/coverage@5%": 0.009765625,
"calibration/ece": 0.12730379469726888,
"calibration/mean_confidence": 0.5350013606511557,
"calibration/prompt_uniqueness": 0.8859659534339229,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 721.4,
"completions/max_terminated_length": 502.4,
"completions/mean_length": 158.0119140625,
"completions/mean_terminated_length": 157.87767639160157,
"completions/min_length": 63.0,
"completions/min_terminated_length": 63.0,
"epoch": 0.304,
"grad_norm": 0.0013374168192967772,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 315494408.0,
"reward": 0.920866048336029,
"reward_std": 0.10256336778402328,
"rewards/accuracy_reward": 0.51435546875,
"rewards/brier_reward": 0.7553410172462464,
"rewards/confidence_uniqueness_reward": 0.9617467761039734,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003019801015034318,
"rewards/frontier_coverage_0": 0.08569234870374202,
"rewards/frontier_coverage_1": 0.08569234870374202,
"rewards/frontier_coverage_10": 0.08569234870374202,
"rewards/frontier_coverage_15": 0.08569234870374202,
"rewards/frontier_coverage_20": 0.08569234870374202,
"rewards/frontier_coverage_25": 0.08569234870374202,
"rewards/frontier_coverage_5": 0.08569234870374202,
"rewards/frontier_ece_reward": 0.014303101412951946,
"rewards/frontier_entropy_batch_reward": -0.16813477575778962,
"signal/accuracy_reward/centered_abs_mean": 0.118218994140625,
"signal/accuracy_reward/group_bin_occupancy": 0.181640625,
"signal/accuracy_reward/group_std_mean": 0.15723580718040467,
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0591094970703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0591094970703125,
"signal/advantage_abs_mean": 0.07947536259889602,
"signal/advantage_pre_scale_abs_mean": 0.07947536259889602,
"signal/advantage_pre_scale_std": 0.1194717451930046,
"signal/advantage_std": 0.1194717451930046,
"signal/brier_reward/centered_abs_mean": 0.16404346823692323,
"signal/brier_reward/group_bin_occupancy": 0.878515625,
"signal/brier_reward/group_std_mean": 0.20719643235206603,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016404346562922,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016404346562922,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012253463082015515,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01594906710088253,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001225346396677196,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001225346396677196,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021592382341623305,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76875,
"signal/frontier_aurc_reward/group_std_mean": 0.0032209414057433605,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.699047727219295e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.699047727219295e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_0/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_1/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_1/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_10/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_10/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_15/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_15/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_20/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_20/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_25/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_25/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_5/centered_abs_mean": 0.2054966926574707,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_5/group_std_mean": 0.2626974046230316,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025687087327241898,
"signal/frontier_ece_reward/centered_abs_mean": 0.03668390363454819,
"signal/frontier_ece_reward/group_bin_occupancy": 0.805078125,
"signal/frontier_ece_reward/group_std_mean": 0.05322126373648643,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003668390540406108,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003668390540406108,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25058538317680357,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3319805324077606,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025058538839221,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025058538839221,
"step": 95
},
{
"calibration/aurc": 0.2693574848107298,
"calibration/batch_distribution_entropy": 0.9883345901493682,
"calibration/batch_entropy_100bins": 0.964873857679726,
"calibration/batch_entropy_10bins": 0.9883345901493682,
"calibration/batch_entropy_50bins": 0.9762615204324228,
"calibration/batch_uniqueness": 0.962933349609375,
"calibration/buffer_distribution_entropy": 0.9829313733544339,
"calibration/buffer_entropy_100bins": 0.9553256663761157,
"calibration/buffer_entropy_10bins": 0.9829313733544339,
"calibration/buffer_entropy_50bins": 0.9704623223411284,
"calibration/confidence_entropy": 0.5033581533240031,
"calibration/coverage@0%": 0.008984375,
"calibration/coverage@1%": 0.008984375,
"calibration/coverage@10%": 0.198046875,
"calibration/coverage@15%": 0.321875,
"calibration/coverage@20%": 0.42265625,
"calibration/coverage@25%": 0.519140625,
"calibration/coverage@30%": 0.587890625,
"calibration/coverage@5%": 0.051171875,
"calibration/ece": 0.1460491076202734,
"calibration/mean_confidence": 0.536501651706636,
"calibration/prompt_uniqueness": 0.88388671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 505.2,
"completions/max_terminated_length": 505.2,
"completions/mean_length": 159.97177734375,
"completions/mean_terminated_length": 159.97177734375,
"completions/min_length": 73.0,
"completions/min_terminated_length": 73.0,
"epoch": 0.32,
"grad_norm": 0.001107752905227244,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 332221223.0,
"reward": 0.9314111828804016,
"reward_std": 0.09003743529319763,
"rewards/accuracy_reward": 0.53388671875,
"rewards/brier_reward": 0.7707650423049927,
"rewards/confidence_uniqueness_reward": 0.9618469119071961,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002678099344484508,
"rewards/frontier_coverage_0": 0.08919677138328552,
"rewards/frontier_coverage_1": 0.08919677138328552,
"rewards/frontier_coverage_10": 0.08919677138328552,
"rewards/frontier_coverage_15": 0.08919677138328552,
"rewards/frontier_coverage_20": 0.08919677138328552,
"rewards/frontier_coverage_25": 0.08919677138328552,
"rewards/frontier_coverage_5": 0.08919677138328552,
"rewards/frontier_ece_reward": 0.017109639570116998,
"rewards/frontier_entropy_batch_reward": -0.18177941143512727,
"signal/accuracy_reward/centered_abs_mean": 0.088275146484375,
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
"signal/accuracy_reward/group_std_mean": 0.12341197431087494,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441375732421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441375732421875,
"signal/advantage_abs_mean": 0.06831415593624116,
"signal/advantage_pre_scale_abs_mean": 0.06831415593624116,
"signal/advantage_pre_scale_std": 0.10720582604408264,
"signal/advantage_std": 0.10720582604408264,
"signal/brier_reward/centered_abs_mean": 0.1551128536462784,
"signal/brier_reward/group_bin_occupancy": 0.8765625,
"signal/brier_reward/group_std_mean": 0.19683083295822143,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01551128625869751,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01551128625869751,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011729908920824528,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.930859375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015355130471289159,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011729909107089042,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011729909107089042,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002058024751022458,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.765234375,
"signal/frontier_aurc_reward/group_std_mean": 0.0031040641479194164,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5725309751578606e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5725309751578606e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_0/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_0/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_1/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_1/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_1/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_10/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_10/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_10/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_15/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_15/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_15/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_20/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_20/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_25/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_25/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_25/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_5/centered_abs_mean": 0.18719760775566102,
"signal/frontier_coverage_5/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_5/group_std_mean": 0.2401178687810898,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023399701341986654,
"signal/frontier_ece_reward/centered_abs_mean": 0.036752212792634964,
"signal/frontier_ece_reward/group_bin_occupancy": 0.809375,
"signal/frontier_ece_reward/group_std_mean": 0.05313318446278572,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003675221325829625,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003675221325829625,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2627487242221832,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3402763903141022,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026274873316287993,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026274873316287993,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5221447170861099,
"eval_calibration/batch_distribution_entropy": 0.9459823530950395,
"eval_calibration/batch_entropy_100bins": 0.7099314297963414,
"eval_calibration/batch_entropy_10bins": 0.9459823530950395,
"eval_calibration/batch_entropy_50bins": 0.80145287432243,
"eval_calibration/batch_uniqueness": 0.9072265625,
"eval_calibration/buffer_distribution_entropy": 0.9836311818150062,
"eval_calibration/buffer_entropy_100bins": 0.9572832371945064,
"eval_calibration/buffer_entropy_10bins": 0.9836311818150062,
"eval_calibration/buffer_entropy_50bins": 0.9718177911308372,
"eval_calibration/confidence_entropy": 0.48188092462291277,
"eval_calibration/coverage@0%": 0.0234375,
"eval_calibration/coverage@1%": 0.0234375,
"eval_calibration/coverage@10%": 0.0234375,
"eval_calibration/coverage@15%": 0.0234375,
"eval_calibration/coverage@20%": 0.0234375,
"eval_calibration/coverage@25%": 0.0234375,
"eval_calibration/coverage@30%": 0.078125,
"eval_calibration/coverage@5%": 0.0234375,
"eval_calibration/ece": 0.21960188052234064,
"eval_calibration/mean_confidence": 0.4858527777314262,
"eval_calibration/prompt_uniqueness": 0.9072265625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 343.5,
"eval_completions/max_terminated_length": 343.5,
"eval_completions/mean_length": 161.39608001708984,
"eval_completions/mean_terminated_length": 161.39608001708984,
"eval_completions/min_length": 88.0,
"eval_completions/min_terminated_length": 88.0,
"eval_loss": 0.0,
"eval_num_tokens": 332221223.0,
"eval_reward": 0.7855877131223679,
"eval_reward_std": 0.2253391109406948,
"eval_rewards/accuracy_reward": 0.40234375,
"eval_rewards/brier_reward": 0.767953634262085,
"eval_rewards/confidence_uniqueness_reward": 0.904541015625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003360171685926616,
"eval_rewards/frontier_coverage_0": 0.17842230759561062,
"eval_rewards/frontier_coverage_1": 0.17842230759561062,
"eval_rewards/frontier_coverage_10": 0.17842230759561062,
"eval_rewards/frontier_coverage_15": 0.17842230759561062,
"eval_rewards/frontier_coverage_20": 0.17842230759561062,
"eval_rewards/frontier_coverage_25": 0.17842230759561062,
"eval_rewards/frontier_coverage_5": 0.17842230759561062,
"eval_rewards/frontier_ece_reward": 0.015964159043505788,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 18.266,
"eval_samples_per_second": 27.373,
"eval_signal/accuracy_reward/centered_abs_mean": 0.46875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.491495244204998,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.234375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.234375,
"eval_signal/advantage_abs_mean": 0.20828185975551605,
"eval_signal/advantage_pre_scale_abs_mean": 0.20828185975551605,
"eval_signal/advantage_pre_scale_std": 0.2229425571858883,
"eval_signal/advantage_std": 0.2229425571858883,
"eval_signal/brier_reward/centered_abs_mean": 0.21178840100765228,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.26099943369627,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021178840193897486,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021178840193897486,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037750244140625,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3671875,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04435160104185343,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037750244955532253,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037750244955532253,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00327087048208341,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8046875,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0050255340756848454,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.088588320882991e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.088588320882991e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36353210359811783,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4482342004776001,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004544151364825666,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.037730203941464424,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.765625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.061389719136059284,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037730205804109573,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037730205804109573,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.219,
"step": 100
},
{
"calibration/aurc": 0.30565225146740005,
"calibration/batch_distribution_entropy": 0.9817950038846648,
"calibration/batch_entropy_100bins": 0.9574620453276896,
"calibration/batch_entropy_10bins": 0.9817950038846648,
"calibration/batch_entropy_50bins": 0.9759610198636606,
"calibration/batch_uniqueness": 0.9610809326171875,
"calibration/buffer_distribution_entropy": 0.9858778348612034,
"calibration/buffer_entropy_100bins": 0.9610770923601644,
"calibration/buffer_entropy_10bins": 0.9858778348612034,
"calibration/buffer_entropy_50bins": 0.9746952731382882,
"calibration/confidence_entropy": 0.4912567329274273,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.001953125,
"calibration/coverage@15%": 0.06328125,
"calibration/coverage@20%": 0.15,
"calibration/coverage@25%": 0.326171875,
"calibration/coverage@30%": 0.60625,
"calibration/coverage@5%": 0.001953125,
"calibration/ece": 0.13340430237380113,
"calibration/mean_confidence": 0.4750956146068159,
"calibration/prompt_uniqueness": 0.87880859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 502.0,
"completions/max_terminated_length": 502.0,
"completions/mean_length": 161.48173828125,
"completions/mean_terminated_length": 161.48173828125,
"completions/min_length": 64.4,
"completions/min_terminated_length": 64.4,
"epoch": 0.336,
"grad_norm": 0.0009220660431310534,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 348597228.0,
"reward": 0.9312142729759216,
"reward_std": 0.09383742660284042,
"rewards/accuracy_reward": 0.5375,
"rewards/brier_reward": 0.7658124089241027,
"rewards/confidence_uniqueness_reward": 0.9620379209518433,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002564789913594723,
"rewards/frontier_coverage_0": 0.086562230437994,
"rewards/frontier_coverage_1": 0.086562230437994,
"rewards/frontier_coverage_10": 0.086562230437994,
"rewards/frontier_coverage_15": 0.086562230437994,
"rewards/frontier_coverage_20": 0.086562230437994,
"rewards/frontier_coverage_25": 0.086562230437994,
"rewards/frontier_coverage_5": 0.086562230437994,
"rewards/frontier_ece_reward": 0.016775081306695937,
"rewards/frontier_entropy_batch_reward": -0.19491543173789977,
"signal/accuracy_reward/centered_abs_mean": 0.10306396484375,
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
"signal/accuracy_reward/group_std_mean": 0.13901238441467284,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051531982421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051531982421875,
"signal/advantage_abs_mean": 0.07275230437517166,
"signal/advantage_pre_scale_abs_mean": 0.07275230437517166,
"signal/advantage_pre_scale_std": 0.11130416691303253,
"signal/advantage_std": 0.11130416691303253,
"signal/brier_reward/centered_abs_mean": 0.15814976394176483,
"signal/brier_reward/group_bin_occupancy": 0.862890625,
"signal/brier_reward/group_std_mean": 0.20069519579410552,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01581497713923454,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01581497713923454,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013001594133675099,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01670000497251749,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013001594459638,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013001594459638,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019870033720508218,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7640625,
"signal/frontier_aurc_reward/group_std_mean": 0.0030117711983621122,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4837543605826795e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4837543605826795e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_0/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_0/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_1/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_1/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_1/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_10/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_10/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_10/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_15/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_15/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_15/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_20/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_20/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_25/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_25/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_25/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_5/centered_abs_mean": 0.2016854852437973,
"signal/frontier_coverage_5/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_5/group_std_mean": 0.2578335404396057,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025210686959326266,
"signal/frontier_ece_reward/centered_abs_mean": 0.03453442975878716,
"signal/frontier_ece_reward/group_bin_occupancy": 0.797265625,
"signal/frontier_ece_reward/group_std_mean": 0.05071103274822235,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003453443106263876,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003453443106263876,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709068328142166,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34899981021881105,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027090684697031974,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027090684697031974,
"step": 105
},
{
"calibration/aurc": 0.3341179136812372,
"calibration/batch_distribution_entropy": 0.9588299480996344,
"calibration/batch_entropy_100bins": 0.9429023940351428,
"calibration/batch_entropy_10bins": 0.9588299480996344,
"calibration/batch_entropy_50bins": 0.9597674673427136,
"calibration/batch_uniqueness": 0.9558685302734375,
"calibration/buffer_distribution_entropy": 0.9919930074210095,
"calibration/buffer_entropy_100bins": 0.9728636582987387,
"calibration/buffer_entropy_10bins": 0.9919930074210095,
"calibration/buffer_entropy_50bins": 0.9833901381550991,
"calibration/confidence_entropy": 0.4574315079144936,
"calibration/coverage@0%": 0.01953125,
"calibration/coverage@1%": 0.01953125,
"calibration/coverage@10%": 0.1015625,
"calibration/coverage@15%": 0.225390625,
"calibration/coverage@20%": 0.333203125,
"calibration/coverage@25%": 0.41015625,
"calibration/coverage@30%": 0.48515625,
"calibration/coverage@5%": 0.0265625,
"calibration/ece": 0.13175204310416827,
"calibration/mean_confidence": 0.44462494666585883,
"calibration/prompt_uniqueness": 0.865966796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 614.4,
"completions/max_terminated_length": 614.4,
"completions/mean_length": 162.67568359375,
"completions/mean_terminated_length": 162.67568359375,
"completions/min_length": 72.6,
"completions/min_terminated_length": 72.6,
"epoch": 0.352,
"grad_norm": 0.0011161722941324115,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 365523443.0,
"reward": 0.9019862651824951,
"reward_std": 0.09622626602649689,
"rewards/accuracy_reward": 0.47529296875,
"rewards/brier_reward": 0.7733848929405213,
"rewards/confidence_uniqueness_reward": 0.9622901916503906,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002903068531304598,
"rewards/frontier_coverage_0": 0.13595542460680007,
"rewards/frontier_coverage_1": 0.13595542460680007,
"rewards/frontier_coverage_10": 0.13595542460680007,
"rewards/frontier_coverage_15": 0.13595542460680007,
"rewards/frontier_coverage_20": 0.13595542460680007,
"rewards/frontier_coverage_25": 0.13595542460680007,
"rewards/frontier_coverage_5": 0.13595542460680007,
"rewards/frontier_ece_reward": 0.015835122019052506,
"rewards/frontier_entropy_batch_reward": -0.22671036124229432,
"signal/accuracy_reward/centered_abs_mean": 0.110052490234375,
"signal/accuracy_reward/group_bin_occupancy": 0.175390625,
"signal/accuracy_reward/group_std_mean": 0.14299680292606354,
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0550262451171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0550262451171875,
"signal/advantage_abs_mean": 0.07607890367507934,
"signal/advantage_pre_scale_abs_mean": 0.07607890367507934,
"signal/advantage_pre_scale_std": 0.11513545215129853,
"signal/advantage_std": 0.11513545215129853,
"signal/brier_reward/centered_abs_mean": 0.15754351615905762,
"signal/brier_reward/group_bin_occupancy": 0.85390625,
"signal/brier_reward/group_std_mean": 0.2009361833333969,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015754351764917372,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015754351764917372,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014543581008911132,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018578647449612618,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014543581288307904,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014543581288307904,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002373543428257108,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.753515625,
"signal/frontier_aurc_reward/group_std_mean": 0.0036731195170432327,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.966929350805003e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.966929350805003e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_0/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_0/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_1/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_1/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_10/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_10/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_15/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_15/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_20/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_20/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_25/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_25/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_25/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_5/centered_abs_mean": 0.20907978415489198,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_5/group_std_mean": 0.2643455803394318,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026134973857551815,
"signal/frontier_ece_reward/centered_abs_mean": 0.03194341510534286,
"signal/frontier_ece_reward/group_bin_occupancy": 0.793359375,
"signal/frontier_ece_reward/group_std_mean": 0.04675339683890343,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003194341529160738,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003194341529160738,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29024515151977537,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36637923717498777,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029024516791105272,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029024516791105272,
"step": 110
},
{
"calibration/aurc": 0.3880178279199678,
"calibration/batch_distribution_entropy": 0.976342214061056,
"calibration/batch_entropy_100bins": 0.947550732636436,
"calibration/batch_entropy_10bins": 0.976342214061056,
"calibration/batch_entropy_50bins": 0.9675931845241565,
"calibration/batch_uniqueness": 0.9630584716796875,
"calibration/buffer_distribution_entropy": 0.9963760938673006,
"calibration/buffer_entropy_100bins": 0.9822785458994316,
"calibration/buffer_entropy_10bins": 0.9963760938673006,
"calibration/buffer_entropy_50bins": 0.9900233021144178,
"calibration/confidence_entropy": 0.4652751317872042,
"calibration/coverage@0%": 0.00859375,
"calibration/coverage@1%": 0.00859375,
"calibration/coverage@10%": 0.043359375,
"calibration/coverage@15%": 0.08515625,
"calibration/coverage@20%": 0.165234375,
"calibration/coverage@25%": 0.320703125,
"calibration/coverage@30%": 0.365234375,
"calibration/coverage@5%": 0.019921875,
"calibration/ece": 0.1469109474352704,
"calibration/mean_confidence": 0.5193943627209796,
"calibration/prompt_uniqueness": 0.88134765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 970.2,
"completions/max_terminated_length": 616.4,
"completions/mean_length": 163.323828125,
"completions/mean_terminated_length": 163.05493774414063,
"completions/min_length": 70.4,
"completions/min_terminated_length": 70.4,
"epoch": 0.368,
"grad_norm": 0.0010371602838858962,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 382261351.0,
"reward": 0.9131480097770691,
"reward_std": 0.09592696875333787,
"rewards/accuracy_reward": 0.5,
"rewards/brier_reward": 0.771314287185669,
"rewards/confidence_uniqueness_reward": 0.9649186968803406,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0033288683742284777,
"rewards/frontier_coverage_0": 0.1177740141749382,
"rewards/frontier_coverage_1": 0.1177740141749382,
"rewards/frontier_coverage_10": 0.1177740141749382,
"rewards/frontier_coverage_15": 0.1177740141749382,
"rewards/frontier_coverage_20": 0.1177740141749382,
"rewards/frontier_coverage_25": 0.1177740141749382,
"rewards/frontier_coverage_5": 0.1177740141749382,
"rewards/frontier_ece_reward": 0.014998926036059856,
"rewards/frontier_entropy_batch_reward": -0.22141122221946716,
"signal/accuracy_reward/centered_abs_mean": 0.1026611328125,
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
"signal/accuracy_reward/group_std_mean": 0.13475327789783478,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625,
"signal/advantage_abs_mean": 0.07525163143873215,
"signal/advantage_pre_scale_abs_mean": 0.07525163143873215,
"signal/advantage_pre_scale_std": 0.11405473798513413,
"signal/advantage_std": 0.11405473798513413,
"signal/brier_reward/centered_abs_mean": 0.1544673502445221,
"signal/brier_reward/group_bin_occupancy": 0.859375,
"signal/brier_reward/group_std_mean": 0.19688616693019867,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015446734987199307,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.015446734987199307,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013924498483538628,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01840968318283558,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001392449880950153,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001392449880950153,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003003736166283488,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004610391240566969,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7546701423707415e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7546701423707415e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_0/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_0/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_1/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_1/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_10/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_10/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_10/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_15/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_15/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_20/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_25/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_25/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_25/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_5/centered_abs_mean": 0.19367235004901887,
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_5/group_std_mean": 0.24632689356803894,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024209044873714446,
"signal/frontier_ece_reward/centered_abs_mean": 0.03021877408027649,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82109375,
"signal/frontier_ece_reward/group_std_mean": 0.042821260541677474,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030218774918466806,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030218774918466806,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2965745747089386,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37327985763549804,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029657458886504173,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029657458886504173,
"step": 115
},
{
"calibration/aurc": 0.3511573014841358,
"calibration/batch_distribution_entropy": 0.9747074938964195,
"calibration/batch_entropy_100bins": 0.9529573262726035,
"calibration/batch_entropy_10bins": 0.9747074938964195,
"calibration/batch_entropy_50bins": 0.9709856182741701,
"calibration/batch_uniqueness": 0.961004638671875,
"calibration/buffer_distribution_entropy": 0.9985135010971646,
"calibration/buffer_entropy_100bins": 0.9882330495239049,
"calibration/buffer_entropy_10bins": 0.9985135010971646,
"calibration/buffer_entropy_50bins": 0.99400929049607,
"calibration/confidence_entropy": 0.4714483178300538,
"calibration/coverage@0%": 0.0078125,
"calibration/coverage@1%": 0.0078125,
"calibration/coverage@10%": 0.135546875,
"calibration/coverage@15%": 0.205859375,
"calibration/coverage@20%": 0.29765625,
"calibration/coverage@25%": 0.34453125,
"calibration/coverage@30%": 0.398828125,
"calibration/coverage@5%": 0.04921875,
"calibration/ece": 0.1429656466704134,
"calibration/mean_confidence": 0.4671164953742596,
"calibration/prompt_uniqueness": 0.870458984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 953.2,
"completions/max_terminated_length": 579.2,
"completions/mean_length": 162.4927734375,
"completions/mean_terminated_length": 162.2256286621094,
"completions/min_length": 69.4,
"completions/min_terminated_length": 69.4,
"epoch": 0.384,
"grad_norm": 0.0010119343642145395,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 398781789.0,
"reward": 0.9290378093719482,
"reward_std": 0.09455136507749558,
"rewards/accuracy_reward": 0.530859375,
"rewards/brier_reward": 0.7825330376625061,
"rewards/confidence_uniqueness_reward": 0.9647130966186523,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003093740437179804,
"rewards/frontier_coverage_0": 0.10442648828029633,
"rewards/frontier_coverage_1": 0.10442648828029633,
"rewards/frontier_coverage_10": 0.10442648828029633,
"rewards/frontier_coverage_15": 0.10442648828029633,
"rewards/frontier_coverage_20": 0.10442648828029633,
"rewards/frontier_coverage_25": 0.10442648828029633,
"rewards/frontier_coverage_5": 0.10442648828029633,
"rewards/frontier_ece_reward": 0.013694177567958831,
"rewards/frontier_entropy_batch_reward": -0.21486915349960328,
"signal/accuracy_reward/centered_abs_mean": 0.10360107421875,
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
"signal/accuracy_reward/group_std_mean": 0.13945001363754272,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051800537109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051800537109375,
"signal/advantage_abs_mean": 0.07290669530630112,
"signal/advantage_pre_scale_abs_mean": 0.07290669530630112,
"signal/advantage_pre_scale_std": 0.11290555596351623,
"signal/advantage_std": 0.11290555596351623,
"signal/brier_reward/centered_abs_mean": 0.13870272636413575,
"signal/brier_reward/group_bin_occupancy": 0.841796875,
"signal/brier_reward/group_std_mean": 0.1802999347448349,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013870272599160672,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013870272599160672,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013296573236584664,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.883203125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017479157820343972,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013296573655679823,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013296573655679823,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002974971802905202,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125,
"signal/frontier_aurc_reward/group_std_mean": 0.004687594994902611,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.718714833667036e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.718714833667036e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_0/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_0/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_1/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_1/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_10/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_10/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_15/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_15/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_20/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_20/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_25/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_25/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_5/centered_abs_mean": 0.18035527765750886,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_5/group_std_mean": 0.2348244309425354,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022544410079717637,
"signal/frontier_ece_reward/centered_abs_mean": 0.02462676987051964,
"signal/frontier_ece_reward/group_bin_occupancy": 0.832421875,
"signal/frontier_ece_reward/group_std_mean": 0.03513662964105606,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002462677052244544,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002462677052244544,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28050180673599245,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35507087111473085,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02805018164217472,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02805018164217472,
"step": 120
},
{
"calibration/aurc": 0.42825535009099047,
"calibration/batch_distribution_entropy": 0.9843865680153211,
"calibration/batch_entropy_100bins": 0.9538506842868667,
"calibration/batch_entropy_10bins": 0.9843865680153211,
"calibration/batch_entropy_50bins": 0.9709620915826056,
"calibration/batch_uniqueness": 0.9653472900390625,
"calibration/buffer_distribution_entropy": 0.9992208380784964,
"calibration/buffer_entropy_100bins": 0.9907419869998911,
"calibration/buffer_entropy_10bins": 0.9992208380784964,
"calibration/buffer_entropy_50bins": 0.9958122546116293,
"calibration/confidence_entropy": 0.5159266248918248,
"calibration/coverage@0%": 0.00390625,
"calibration/coverage@1%": 0.00390625,
"calibration/coverage@10%": 0.010546875,
"calibration/coverage@15%": 0.016796875,
"calibration/coverage@20%": 0.04375,
"calibration/coverage@25%": 0.080859375,
"calibration/coverage@30%": 0.129296875,
"calibration/coverage@5%": 0.00390625,
"calibration/ece": 0.14866836772807127,
"calibration/mean_confidence": 0.512600992886363,
"calibration/prompt_uniqueness": 0.88505859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 912.2,
"completions/max_terminated_length": 671.4,
"completions/mean_length": 163.5146484375,
"completions/mean_terminated_length": 163.24703369140624,
"completions/min_length": 72.4,
"completions/min_terminated_length": 72.4,
"epoch": 0.4,
"grad_norm": 0.0015616186428815126,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 415492627.0,
"reward": 0.9132670998573303,
"reward_std": 0.10271851271390915,
"rewards/accuracy_reward": 0.50390625,
"rewards/brier_reward": 0.7699137806892395,
"rewards/confidence_uniqueness_reward": 0.9647279858589173,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003537365049123764,
"rewards/frontier_coverage_0": 0.10254341214895249,
"rewards/frontier_coverage_1": 0.10254341214895249,
"rewards/frontier_coverage_10": 0.10254341214895249,
"rewards/frontier_coverage_15": 0.10254341214895249,
"rewards/frontier_coverage_20": 0.10254341214895249,
"rewards/frontier_coverage_25": 0.10254341214895249,
"rewards/frontier_coverage_5": 0.10254341214895249,
"rewards/frontier_ece_reward": 0.010206561535596848,
"rewards/frontier_entropy_batch_reward": -0.21855055093765258,
"signal/accuracy_reward/centered_abs_mean": 0.11923828125,
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
"signal/accuracy_reward/group_std_mean": 0.15680868923664093,
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059619140625,
"signal/advantage_abs_mean": 0.07999172508716583,
"signal/advantage_pre_scale_abs_mean": 0.07999172508716583,
"signal/advantage_pre_scale_std": 0.1227585643529892,
"signal/advantage_std": 0.1227585643529892,
"signal/brier_reward/centered_abs_mean": 0.1432872533798218,
"signal/brier_reward/group_bin_occupancy": 0.874609375,
"signal/brier_reward/group_std_mean": 0.1836364448070526,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014328726008534432,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014328726008534432,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01301488820463419,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018223760277032854,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013014888390898705,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013014888390898705,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033591561019420623,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7171875,
"signal/frontier_aurc_reward/group_std_mean": 0.0054647172801196575,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1989451710833235e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1989451710833235e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_0/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_1/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_1/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_10/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_10/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_15/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_15/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_20/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_20/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_25/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_25/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_5/centered_abs_mean": 0.1780136674642563,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_5/group_std_mean": 0.2294588565826416,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022251708433032036,
"signal/frontier_ece_reward/centered_abs_mean": 0.020605326071381568,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
"signal/frontier_ece_reward/group_std_mean": 0.029997162893414496,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020605326164513825,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020605326164513825,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2866648018360138,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36136451959609983,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028666481375694275,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028666481375694275,
"step": 125
},
{
"calibration/aurc": 0.32033459954652566,
"calibration/batch_distribution_entropy": 0.9757990834357446,
"calibration/batch_entropy_100bins": 0.9516993202749005,
"calibration/batch_entropy_10bins": 0.9757990834357446,
"calibration/batch_entropy_50bins": 0.969603238359294,
"calibration/batch_uniqueness": 0.964125328319047,
"calibration/buffer_distribution_entropy": 0.9992577401022663,
"calibration/buffer_entropy_100bins": 0.990822037253148,
"calibration/buffer_entropy_10bins": 0.9992577401022663,
"calibration/buffer_entropy_50bins": 0.9958939450746145,
"calibration/confidence_entropy": 0.5194667423516511,
"calibration/coverage@0%": 0.0027366682974559685,
"calibration/coverage@1%": 0.0027366682974559685,
"calibration/coverage@10%": 0.025431139921722114,
"calibration/coverage@15%": 0.092284582925636,
"calibration/coverage@20%": 0.14117158721365258,
"calibration/coverage@25%": 0.2542489735620276,
"calibration/coverage@30%": 0.43019097621925484,
"calibration/coverage@5%": 0.0027366682974559685,
"calibration/ece": 0.11552021771290169,
"calibration/mean_confidence": 0.5210242359988329,
"calibration/prompt_uniqueness": 0.8861906174575818,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 1388.2,
"completions/max_terminated_length": 1059.2,
"completions/mean_length": 172.62021484375,
"completions/mean_terminated_length": 171.42176208496093,
"completions/min_length": 73.6,
"completions/min_terminated_length": 73.6,
"epoch": 0.416,
"grad_norm": 0.0010845692595466971,
"learning_rate": 1e-06,
"loss": 0.0021,
"num_tokens": 432141442.0,
"reward": 0.9234651088714599,
"reward_std": 0.10108603686094284,
"rewards/accuracy_reward": 0.518359375,
"rewards/brier_reward": 0.7800793528556824,
"rewards/confidence_uniqueness_reward": 0.9633147358894348,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.00315277217887342,
"rewards/frontier_coverage_0": 0.10128591805696488,
"rewards/frontier_coverage_1": 0.10128591805696488,
"rewards/frontier_coverage_10": 0.10128591805696488,
"rewards/frontier_coverage_15": 0.10128591805696488,
"rewards/frontier_coverage_20": 0.10128591805696488,
"rewards/frontier_coverage_25": 0.10118604749441147,
"rewards/frontier_coverage_5": 0.10128591805696488,
"rewards/frontier_ece_reward": 0.01040429063141346,
"rewards/frontier_entropy_batch_reward": -0.1947682112455368,
"signal/accuracy_reward/centered_abs_mean": 0.11812744140625,
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
"signal/accuracy_reward/group_std_mean": 0.15311342775821685,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059063720703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059063720703125,
"signal/advantage_abs_mean": 0.07872487008571624,
"signal/advantage_pre_scale_abs_mean": 0.07872487008571624,
"signal/advantage_pre_scale_std": 0.1202880859375,
"signal/advantage_std": 0.1202880859375,
"signal/brier_reward/centered_abs_mean": 0.14082336127758027,
"signal/brier_reward/group_bin_occupancy": 0.875,
"signal/brier_reward/group_std_mean": 0.17933386862277984,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408233605325222,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01408233605325222,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013442078977823258,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.888671875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01916816532611847,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013442079536616803,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013442079536616803,
"signal/format_reward/centered_abs_mean": 0.001678466796875,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.004299227613955736,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008392333984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008392333984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031103747431188824,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875,
"signal/frontier_aurc_reward/group_std_mean": 0.005056559341028333,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.887968414346687e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.887968414346687e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_0/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_1/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_1/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_10/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_10/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_15/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_15/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_20/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_20/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_25/centered_abs_mean": 0.18864355981349945,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_25/group_std_mean": 0.2383899211883545,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023580444511026146,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023580444511026146,
"signal/frontier_coverage_5/centered_abs_mean": 0.18912857472896577,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
"signal/frontier_coverage_5/group_std_mean": 0.23899484276771546,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364107267931104,
"signal/frontier_ece_reward/centered_abs_mean": 0.018303705751895903,
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
"signal/frontier_ece_reward/group_std_mean": 0.026779073104262353,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001830370631068945,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001830370631068945,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27329595685005187,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3500793755054474,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027329596504569054,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027329596504569054,
"step": 130
},
{
"calibration/aurc": 0.2711466896875542,
"calibration/batch_distribution_entropy": 0.9824704911633383,
"calibration/batch_entropy_100bins": 0.957537800410772,
"calibration/batch_entropy_10bins": 0.9824704911633383,
"calibration/batch_entropy_50bins": 0.9750580937567286,
"calibration/batch_uniqueness": 0.964057967535398,
"calibration/buffer_distribution_entropy": 0.9990826808437717,
"calibration/buffer_entropy_100bins": 0.9903883943102378,
"calibration/buffer_entropy_10bins": 0.9990826808437717,
"calibration/buffer_entropy_50bins": 0.9955856525305034,
"calibration/confidence_entropy": 0.4754514819609604,
"calibration/coverage@0%": 0.005876225490196079,
"calibration/coverage@1%": 0.005876225490196079,
"calibration/coverage@10%": 0.07948835784313726,
"calibration/coverage@15%": 0.22814797794117644,
"calibration/coverage@20%": 0.3243229166666667,
"calibration/coverage@25%": 0.42829197303921573,
"calibration/coverage@30%": 0.5064721200980392,
"calibration/coverage@5%": 0.024699754901960785,
"calibration/ece": 0.11323366571529961,
"calibration/mean_confidence": 0.5354937992420586,
"calibration/prompt_uniqueness": 0.8668369638560094,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 948.8,
"completions/max_terminated_length": 640.8,
"completions/mean_length": 176.71396484375,
"completions/mean_terminated_length": 176.18282775878907,
"completions/min_length": 83.8,
"completions/min_terminated_length": 83.8,
"epoch": 0.432,
"grad_norm": 0.0012650451390072703,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 448965329.0,
"reward": 0.9427853345870971,
"reward_std": 0.09141346216201782,
"rewards/accuracy_reward": 0.5541015625,
"rewards/brier_reward": 0.795646071434021,
"rewards/confidence_uniqueness_reward": 0.9624568223953247,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002685644570738077,
"rewards/frontier_coverage_0": 0.10276300571858883,
"rewards/frontier_coverage_1": 0.10276300571858883,
"rewards/frontier_coverage_10": 0.10276300571858883,
"rewards/frontier_coverage_15": 0.10276300571858883,
"rewards/frontier_coverage_20": 0.10276300571858883,
"rewards/frontier_coverage_25": 0.10133399069309235,
"rewards/frontier_coverage_5": 0.10276300571858883,
"rewards/frontier_ece_reward": 0.012095463648438453,
"rewards/frontier_entropy_batch_reward": -0.2003028452396393,
"signal/accuracy_reward/centered_abs_mean": 0.10240478515625,
"signal/accuracy_reward/group_bin_occupancy": 0.173046875,
"signal/accuracy_reward/group_std_mean": 0.13419998735189437,
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051202392578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051202392578125,
"signal/advantage_abs_mean": 0.07144368439912796,
"signal/advantage_pre_scale_abs_mean": 0.07144368439912796,
"signal/advantage_pre_scale_std": 0.11116426140069961,
"signal/advantage_std": 0.11116426140069961,
"signal/brier_reward/centered_abs_mean": 0.1337427169084549,
"signal/brier_reward/group_bin_occupancy": 0.84296875,
"signal/brier_reward/group_std_mean": 0.171070197224617,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013374271430075168,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013374271430075168,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013302310928702354,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88203125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01829577349126339,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001330231106840074,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001330231106840074,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086068242787,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003123843017965555,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625,
"signal/frontier_aurc_reward/group_std_mean": 0.0052942352835088965,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.904803670593537e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.904803670593537e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_0/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_1/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_1/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_10/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_10/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_15/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_20/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_20/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_25/centered_abs_mean": 0.17443813383579254,
"signal/frontier_coverage_25/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_25/group_std_mean": 0.22237459123134612,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021804766729474068,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021804766729474068,
"signal/frontier_coverage_5/centered_abs_mean": 0.1775657594203949,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_5/group_std_mean": 0.22634563744068145,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022195718716830014,
"signal/frontier_ece_reward/centered_abs_mean": 0.017705311998724937,
"signal/frontier_ece_reward/group_bin_occupancy": 0.836328125,
"signal/frontier_ece_reward/group_std_mean": 0.025692766532301903,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017705312930047512,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017705312930047512,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27649489641189573,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3526521801948547,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027649490535259245,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027649490535259245,
"step": 135
},
{
"calibration/aurc": 0.3010584308084431,
"calibration/batch_distribution_entropy": 0.9641241091381325,
"calibration/batch_entropy_100bins": 0.9505054970417863,
"calibration/batch_entropy_10bins": 0.9641241091381325,
"calibration/batch_entropy_50bins": 0.9653212354537498,
"calibration/batch_uniqueness": 0.9634041782070479,
"calibration/buffer_distribution_entropy": 0.9983903237821101,
"calibration/buffer_entropy_100bins": 0.9894302549349414,
"calibration/buffer_entropy_10bins": 0.9983903237821101,
"calibration/buffer_entropy_50bins": 0.9949646771618358,
"calibration/confidence_entropy": 0.5043917074884562,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.032421875,
"calibration/coverage@15%": 0.092578125,
"calibration/coverage@20%": 0.1625,
"calibration/coverage@25%": 0.28093428938356163,
"calibration/coverage@30%": 0.4881788160469667,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.13595115217602646,
"calibration/mean_confidence": 0.5649633774254619,
"calibration/prompt_uniqueness": 0.8865723164347035,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 563.2,
"completions/mean_length": 187.29921875,
"completions/mean_terminated_length": 186.64035034179688,
"completions/min_length": 76.4,
"completions/min_terminated_length": 76.4,
"epoch": 0.448,
"grad_norm": 0.00112549914047122,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 465836073.0,
"reward": 0.9279234051704407,
"reward_std": 0.09372627437114715,
"rewards/accuracy_reward": 0.52568359375,
"rewards/brier_reward": 0.7868773221969605,
"rewards/confidence_uniqueness_reward": 0.9617484331130981,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0032899423968046904,
"rewards/frontier_coverage_0": 0.10912051647901536,
"rewards/frontier_coverage_1": 0.10912051647901536,
"rewards/frontier_coverage_10": 0.10912051647901536,
"rewards/frontier_coverage_15": 0.10912051647901536,
"rewards/frontier_coverage_20": 0.10912051647901536,
"rewards/frontier_coverage_25": 0.10755196064710618,
"rewards/frontier_coverage_5": 0.10912051647901536,
"rewards/frontier_ece_reward": 0.010184999741613864,
"rewards/frontier_entropy_batch_reward": -0.19944992065429687,
"signal/accuracy_reward/centered_abs_mean": 0.107452392578125,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.1377037927508354,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0537261962890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0537261962890625,
"signal/advantage_abs_mean": 0.0733189657330513,
"signal/advantage_pre_scale_abs_mean": 0.0733189657330513,
"signal/advantage_pre_scale_std": 0.11423833519220353,
"signal/advantage_std": 0.11423833519220353,
"signal/brier_reward/centered_abs_mean": 0.14163122177124024,
"signal/brier_reward/group_bin_occupancy": 0.85234375,
"signal/brier_reward/group_std_mean": 0.18140933215618132,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01416312251240015,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01416312251240015,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013178028725087642,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.90078125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01842593662440777,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013178028631955386,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013178028631955386,
"signal/format_reward/centered_abs_mean": 0.001312255859375,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0035306816920638085,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003526174183934927,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.690234375,
"signal/frontier_aurc_reward/group_std_mean": 0.006031551398336887,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.40771778812632e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.40771778812632e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_0/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_0/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_1/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_1/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_10/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_10/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_15/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_15/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_20/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_20/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_25/centered_abs_mean": 0.17763153314590455,
"signal/frontier_coverage_25/group_bin_occupancy": 0.878125,
"signal/frontier_coverage_25/group_std_mean": 0.22553324997425078,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022203943226486444,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022203943226486444,
"signal/frontier_coverage_5/centered_abs_mean": 0.18577166497707367,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_5/group_std_mean": 0.23553779423236848,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023221459705382586,
"signal/frontier_ece_reward/centered_abs_mean": 0.017202311754226686,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82265625,
"signal/frontier_ece_reward/group_std_mean": 0.025163047760725022,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017202311893925071,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017202311893925071,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2695195287466049,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34306603074073794,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026951952651143075,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026951952651143075,
"step": 140
},
{
"calibration/aurc": 0.3905655690563325,
"calibration/batch_distribution_entropy": 0.9803796350881079,
"calibration/batch_entropy_100bins": 0.960040144438605,
"calibration/batch_entropy_10bins": 0.9803796350881079,
"calibration/batch_entropy_50bins": 0.9728294233983587,
"calibration/batch_uniqueness": 0.9602142333984375,
"calibration/buffer_distribution_entropy": 0.9979704423206602,
"calibration/buffer_entropy_100bins": 0.988638040636473,
"calibration/buffer_entropy_10bins": 0.9979704423206602,
"calibration/buffer_entropy_50bins": 0.9945346302719053,
"calibration/confidence_entropy": 0.5143790373022907,
"calibration/coverage@0%": 0.008984375,
"calibration/coverage@1%": 0.008984375,
"calibration/coverage@10%": 0.016015625,
"calibration/coverage@15%": 0.03046875,
"calibration/coverage@20%": 0.125,
"calibration/coverage@25%": 0.183984375,
"calibration/coverage@30%": 0.294921875,
"calibration/coverage@5%": 0.008984375,
"calibration/ece": 0.13917155590776573,
"calibration/mean_confidence": 0.5079795467692557,
"calibration/prompt_uniqueness": 0.87216796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1347.4,
"completions/max_terminated_length": 1031.8,
"completions/mean_length": 191.52421875,
"completions/mean_terminated_length": 190.86587219238282,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.464,
"grad_norm": 0.0007277204422280192,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 482968097.0,
"reward": 0.8982602834701539,
"reward_std": 0.08826989978551865,
"rewards/accuracy_reward": 0.46787109375,
"rewards/brier_reward": 0.7684149622917176,
"rewards/confidence_uniqueness_reward": 0.959271764755249,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0039596253540366885,
"rewards/frontier_coverage_0": 0.1321122795343399,
"rewards/frontier_coverage_1": 0.1321122795343399,
"rewards/frontier_coverage_10": 0.1321122795343399,
"rewards/frontier_coverage_15": 0.1321122795343399,
"rewards/frontier_coverage_20": 0.1321122795343399,
"rewards/frontier_coverage_25": 0.12441358044743538,
"rewards/frontier_coverage_5": 0.1321122795343399,
"rewards/frontier_ece_reward": 0.007002122979611158,
"rewards/frontier_entropy_batch_reward": -0.2031411647796631,
"signal/accuracy_reward/centered_abs_mean": 0.085882568359375,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.1213410884141922,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429412841796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429412841796875,
"signal/advantage_abs_mean": 0.06655814126133919,
"signal/advantage_pre_scale_abs_mean": 0.06655814126133919,
"signal/advantage_pre_scale_std": 0.10574809014797211,
"signal/advantage_std": 0.10574809014797211,
"signal/brier_reward/centered_abs_mean": 0.14038530886173248,
"signal/brier_reward/group_bin_occupancy": 0.861328125,
"signal/brier_reward/group_std_mean": 0.1817769706249237,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014038531482219696,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014038531482219696,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013213860616087914,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.916796875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018184344843029977,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013213861035183071,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013213861035183071,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036213297862559557,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.691796875,
"signal/frontier_aurc_reward/group_std_mean": 0.006112007796764374,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.526662451098673e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.526662451098673e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_0/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_0/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_1/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_1/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_1/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_10/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_10/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_10/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_15/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_15/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_15/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_20/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_20/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_25/centered_abs_mean": 0.16017631590366363,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87734375,
"signal/frontier_coverage_25/group_std_mean": 0.20728962421417235,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020022039767354726,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020022039767354726,
"signal/frontier_coverage_5/centered_abs_mean": 0.17165872752666472,
"signal/frontier_coverage_5/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_5/group_std_mean": 0.22206704020500184,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145734056830406,
"signal/frontier_ece_reward/centered_abs_mean": 0.01575020458549261,
"signal/frontier_ece_reward/group_bin_occupancy": 0.818359375,
"signal/frontier_ece_reward/group_std_mean": 0.02388475425541401,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015750204911455512,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015750204911455512,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2742986440658569,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34781610369682314,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027429865673184395,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027429865673184395,
"step": 145
},
{
"calibration/aurc": 0.2997569453896353,
"calibration/batch_distribution_entropy": 0.9792118212906589,
"calibration/batch_entropy_100bins": 0.9606337149903614,
"calibration/batch_entropy_10bins": 0.9792118212906589,
"calibration/batch_entropy_50bins": 0.9712305855926182,
"calibration/batch_uniqueness": 0.9567291259765625,
"calibration/buffer_distribution_entropy": 0.9978132956107622,
"calibration/buffer_entropy_100bins": 0.9881803114059542,
"calibration/buffer_entropy_10bins": 0.9978132956107622,
"calibration/buffer_entropy_50bins": 0.9942871392551943,
"calibration/confidence_entropy": 0.4884862201915487,
"calibration/coverage@0%": 0.0109375,
"calibration/coverage@1%": 0.0109375,
"calibration/coverage@10%": 0.016015625,
"calibration/coverage@15%": 0.1421875,
"calibration/coverage@20%": 0.287109375,
"calibration/coverage@25%": 0.38203125,
"calibration/coverage@30%": 0.556640625,
"calibration/coverage@5%": 0.0125,
"calibration/ece": 0.13674428667727873,
"calibration/mean_confidence": 0.509401226049053,
"calibration/prompt_uniqueness": 0.86796875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 947.6,
"completions/mean_length": 194.65380859375,
"completions/mean_terminated_length": 193.9985321044922,
"completions/min_length": 86.8,
"completions/min_terminated_length": 86.8,
"epoch": 0.48,
"grad_norm": 0.0011819824576377869,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 500009384.0,
"reward": 0.9241943120956421,
"reward_std": 0.09635329693555832,
"rewards/accuracy_reward": 0.51845703125,
"rewards/brier_reward": 0.7792062282562255,
"rewards/confidence_uniqueness_reward": 0.9568382143974304,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0031862443778663875,
"rewards/frontier_coverage_0": 0.11428727954626083,
"rewards/frontier_coverage_1": 0.11428727954626083,
"rewards/frontier_coverage_10": 0.11428727954626083,
"rewards/frontier_coverage_15": 0.11428727954626083,
"rewards/frontier_coverage_20": 0.11371297538280487,
"rewards/frontier_coverage_25": 0.1081900030374527,
"rewards/frontier_coverage_5": 0.11428727954626083,
"rewards/frontier_ece_reward": 0.007919181045144797,
"rewards/frontier_entropy_batch_reward": -0.19063332974910735,
"signal/accuracy_reward/centered_abs_mean": 0.120074462890625,
"signal/accuracy_reward/group_bin_occupancy": 0.18046875,
"signal/accuracy_reward/group_std_mean": 0.15685472190380095,
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0600372314453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0600372314453125,
"signal/advantage_abs_mean": 0.07497897297143936,
"signal/advantage_pre_scale_abs_mean": 0.07497897297143936,
"signal/advantage_pre_scale_std": 0.11704835444688796,
"signal/advantage_std": 0.11704835444688796,
"signal/brier_reward/centered_abs_mean": 0.13743520379066468,
"signal/brier_reward/group_bin_occupancy": 0.838671875,
"signal/brier_reward/group_std_mean": 0.17770840525627135,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013743520341813564,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013743520341813564,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01321981344372034,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018044329062104226,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001321981381624937,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001321981381624937,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031342420261353254,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70390625,
"signal/frontier_aurc_reward/group_std_mean": 0.005184091906994581,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.917802387150004e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.917802387150004e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.18790066838264466,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_0/group_std_mean": 0.24077147245407104,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_1/centered_abs_mean": 0.18790066838264466,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_1/group_std_mean": 0.24077147245407104,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_10/centered_abs_mean": 0.18790066838264466,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_10/group_std_mean": 0.24077147245407104,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_15/centered_abs_mean": 0.18790066838264466,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_15/group_std_mean": 0.24077147245407104,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_20/centered_abs_mean": 0.18661079108715056,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_20/group_std_mean": 0.23916022181510926,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002332634944468737,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002332634944468737,
"signal/frontier_coverage_25/centered_abs_mean": 0.17039817869663237,
"signal/frontier_coverage_25/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_25/group_std_mean": 0.21902235150337218,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021299772663041948,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021299772663041948,
"signal/frontier_coverage_5/centered_abs_mean": 0.18790066838264466,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_5/group_std_mean": 0.24077147245407104,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023487584665417673,
"signal/frontier_ece_reward/centered_abs_mean": 0.014475966058671474,
"signal/frontier_ece_reward/group_bin_occupancy": 0.822265625,
"signal/frontier_ece_reward/group_std_mean": 0.022006630897521973,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014475966105237602,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014475966105237602,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26103139519691465,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3364805102348328,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02610314004123211,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02610314004123211,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.4928207956337317,
"eval_calibration/batch_distribution_entropy": 0.9448460828266618,
"eval_calibration/batch_entropy_100bins": 0.7140585686804589,
"eval_calibration/batch_entropy_10bins": 0.9448460828266618,
"eval_calibration/batch_entropy_50bins": 0.7938259760149764,
"eval_calibration/batch_uniqueness": 0.9052734375,
"eval_calibration/buffer_distribution_entropy": 0.9977152222728605,
"eval_calibration/buffer_entropy_100bins": 0.9880421883831635,
"eval_calibration/buffer_entropy_10bins": 0.9977152222728605,
"eval_calibration/buffer_entropy_50bins": 0.9941686668937713,
"eval_calibration/confidence_entropy": 0.4849995680216249,
"eval_calibration/coverage@0%": 0.046875,
"eval_calibration/coverage@1%": 0.046875,
"eval_calibration/coverage@10%": 0.046875,
"eval_calibration/coverage@15%": 0.046875,
"eval_calibration/coverage@20%": 0.09375,
"eval_calibration/coverage@25%": 0.1484375,
"eval_calibration/coverage@30%": 0.1484375,
"eval_calibration/coverage@5%": 0.046875,
"eval_calibration/ece": 0.22999326971199952,
"eval_calibration/mean_confidence": 0.4614743513558307,
"eval_calibration/prompt_uniqueness": 0.9052734375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 434.5,
"eval_completions/max_terminated_length": 434.5,
"eval_completions/mean_length": 194.3330421447754,
"eval_completions/mean_terminated_length": 194.3330421447754,
"eval_completions/min_length": 98.0,
"eval_completions/min_terminated_length": 98.0,
"eval_loss": 0.0,
"eval_num_tokens": 500009384.0,
"eval_reward": 0.7936547994613647,
"eval_reward_std": 0.2236923649907112,
"eval_rewards/accuracy_reward": 0.416015625,
"eval_rewards/brier_reward": 0.7854552268981934,
"eval_rewards/confidence_uniqueness_reward": 0.904296875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0038128122105263174,
"eval_rewards/frontier_coverage_0": 0.18677300214767456,
"eval_rewards/frontier_coverage_1": 0.18677300214767456,
"eval_rewards/frontier_coverage_10": 0.18677300214767456,
"eval_rewards/frontier_coverage_15": 0.18677300214767456,
"eval_rewards/frontier_coverage_20": 0.18603158369660378,
"eval_rewards/frontier_coverage_25": 0.1658840924501419,
"eval_rewards/frontier_coverage_5": 0.18677300214767456,
"eval_rewards/frontier_ece_reward": 0.0064718994544819,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 22.8621,
"eval_samples_per_second": 21.87,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49238111078739166,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375,
"eval_signal/advantage_abs_mean": 0.20876647159457207,
"eval_signal/advantage_pre_scale_abs_mean": 0.20876647159457207,
"eval_signal/advantage_pre_scale_std": 0.22128642722964287,
"eval_signal/advantage_std": 0.22128642722964287,
"eval_signal/brier_reward/centered_abs_mean": 0.1912023350596428,
"eval_signal/brier_reward/group_bin_occupancy": 0.890625,
"eval_signal/brier_reward/group_std_mean": 0.2429308146238327,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019120234064757824,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019120234064757824,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0374603271484375,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04364745691418648,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003746032773051411,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003746032773051411,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004567834781482816,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008484951569698751,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7097938224615064e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7097938224615064e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.35640130937099457,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4366024136543274,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.35640130937099457,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4366024136543274,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35640130937099457,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4366024136543274,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35640130937099457,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4366024136543274,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.35413555800914764,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4339291825890541,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044266944751143456,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044266944751143456,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.31332943588495255,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.3852032795548439,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003916618006769568,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003916618006769568,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35640130937099457,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4366024136543274,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044550164602696896,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013189757708460093,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.018085308838635683,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013189757883083075,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013189757883083075,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.175,
"step": 150
},
{
"calibration/aurc": 0.3905210632456232,
"calibration/batch_distribution_entropy": 0.9905343495592323,
"calibration/batch_entropy_100bins": 0.9699764400711286,
"calibration/batch_entropy_10bins": 0.9905343495592323,
"calibration/batch_entropy_50bins": 0.97994022779757,
"calibration/batch_uniqueness": 0.9584047876376637,
"calibration/buffer_distribution_entropy": 0.9977835093627524,
"calibration/buffer_entropy_100bins": 0.9880724924738754,
"calibration/buffer_entropy_10bins": 0.9977835093627524,
"calibration/buffer_entropy_50bins": 0.9941551940853858,
"calibration/confidence_entropy": 0.500662742867437,
"calibration/coverage@0%": 0.005078889432485323,
"calibration/coverage@1%": 0.005078889432485323,
"calibration/coverage@10%": 0.08125076443248533,
"calibration/coverage@15%": 0.1417976394324853,
"calibration/coverage@20%": 0.1953132644324853,
"calibration/coverage@25%": 0.2523613319471624,
"calibration/coverage@30%": 0.29494786570450093,
"calibration/coverage@5%": 0.03828201443248532,
"calibration/ece": 0.13589736223677878,
"calibration/mean_confidence": 0.49380390196528123,
"calibration/prompt_uniqueness": 0.8630719648315557,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 1201.8,
"completions/max_terminated_length": 791.8,
"completions/mean_length": 193.1357421875,
"completions/mean_terminated_length": 192.3479034423828,
"completions/min_length": 88.4,
"completions/min_terminated_length": 88.4,
"epoch": 0.496,
"grad_norm": 0.0008443639962933958,
"learning_rate": 1e-06,
"loss": 0.0025,
"num_tokens": 517294934.0,
"reward": 0.9380232334136963,
"reward_std": 0.08917539864778519,
"rewards/accuracy_reward": 0.547265625,
"rewards/brier_reward": 0.7819605112075806,
"rewards/confidence_uniqueness_reward": 0.956698739528656,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.002871061023324728,
"rewards/frontier_coverage_0": 0.09036671817302704,
"rewards/frontier_coverage_1": 0.09036671817302704,
"rewards/frontier_coverage_10": 0.09036671817302704,
"rewards/frontier_coverage_15": 0.09036671817302704,
"rewards/frontier_coverage_20": 0.08979679197072983,
"rewards/frontier_coverage_25": 0.075638347864151,
"rewards/frontier_coverage_5": 0.09036671817302704,
"rewards/frontier_ece_reward": 0.006273471284657717,
"rewards/frontier_entropy_batch_reward": -0.17489843368530272,
"signal/accuracy_reward/centered_abs_mean": 0.0958251953125,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.13047634959220886,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04791259765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04791259765625,
"signal/advantage_abs_mean": 0.06811611354351044,
"signal/advantage_pre_scale_abs_mean": 0.06811611354351044,
"signal/advantage_pre_scale_std": 0.10838208794593811,
"signal/advantage_std": 0.10838208794593811,
"signal/brier_reward/centered_abs_mean": 0.13192115724086761,
"signal/brier_reward/group_bin_occupancy": 0.869140625,
"signal/brier_reward/group_std_mean": 0.168493589758873,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013192116282880306,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013192116282880306,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012967484071850777,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018038667924702167,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001296748430468142,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001296748430468142,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629335194827,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028141734655946493,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004710181429982185,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.517716831993312e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.517716831993312e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16962937116622925,
"signal/frontier_coverage_0/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_0/group_std_mean": 0.2177934467792511,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_1/centered_abs_mean": 0.16962937116622925,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_1/group_std_mean": 0.2177934467792511,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_10/centered_abs_mean": 0.16962937116622925,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_10/group_std_mean": 0.2177934467792511,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_15/centered_abs_mean": 0.16962937116622925,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_15/group_std_mean": 0.2177934467792511,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_20/centered_abs_mean": 0.16856757402420045,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.2164437383413315,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021070946007966996,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021070946007966996,
"signal/frontier_coverage_25/centered_abs_mean": 0.14662111103534697,
"signal/frontier_coverage_25/group_bin_occupancy": 0.876953125,
"signal/frontier_coverage_25/group_std_mean": 0.1890866458415985,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001832763897255063,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001832763897255063,
"signal/frontier_coverage_5/centered_abs_mean": 0.16962937116622925,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_5/group_std_mean": 0.2177934467792511,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002120367041788995,
"signal/frontier_ece_reward/centered_abs_mean": 0.011038328520953655,
"signal/frontier_ece_reward/group_bin_occupancy": 0.86796875,
"signal/frontier_ece_reward/group_std_mean": 0.014819971285760403,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011038328986614943,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011038328986614943,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.256817501783371,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3358631134033203,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025681750476360322,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025681750476360322,
"step": 155
},
{
"calibration/aurc": 0.31790763537690625,
"calibration/batch_distribution_entropy": 0.9855522887040239,
"calibration/batch_entropy_100bins": 0.9685646417212451,
"calibration/batch_entropy_10bins": 0.9855522887040239,
"calibration/batch_entropy_50bins": 0.9776787715207496,
"calibration/batch_uniqueness": 0.9575121754105227,
"calibration/buffer_distribution_entropy": 0.9980551921710852,
"calibration/buffer_entropy_100bins": 0.9885994859100894,
"calibration/buffer_entropy_10bins": 0.9980551921710852,
"calibration/buffer_entropy_50bins": 0.9943812246542099,
"calibration/confidence_entropy": 0.5099366283838993,
"calibration/coverage@0%": 0.023481837084148728,
"calibration/coverage@1%": 0.023481837084148728,
"calibration/coverage@10%": 0.19366820572407045,
"calibration/coverage@15%": 0.3277481347847358,
"calibration/coverage@20%": 0.3950319532778865,
"calibration/coverage@25%": 0.4333506604696673,
"calibration/coverage@30%": 0.4865199975538161,
"calibration/coverage@5%": 0.07631941046966731,
"calibration/ece": 0.137644847278309,
"calibration/mean_confidence": 0.5067424365660879,
"calibration/prompt_uniqueness": 0.870397403014438,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1051.6,
"completions/max_terminated_length": 664.6,
"completions/mean_length": 187.24072265625,
"completions/mean_terminated_length": 186.84423217773437,
"completions/min_length": 82.6,
"completions/min_terminated_length": 82.6,
"epoch": 0.512,
"grad_norm": 0.0013515661703422666,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 534357943.0,
"reward": 0.9389643549919129,
"reward_std": 0.08940067738294602,
"rewards/accuracy_reward": 0.5443359375,
"rewards/brier_reward": 0.8010304689407348,
"rewards/confidence_uniqueness_reward": 0.9571277260780334,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002591008087620139,
"rewards/frontier_coverage_0": 0.11089163199067116,
"rewards/frontier_coverage_1": 0.11089163199067116,
"rewards/frontier_coverage_10": 0.11089163199067116,
"rewards/frontier_coverage_15": 0.11089163199067116,
"rewards/frontier_coverage_20": 0.11040212810039521,
"rewards/frontier_coverage_25": 0.0975722998380661,
"rewards/frontier_coverage_5": 0.11089163199067116,
"rewards/frontier_ece_reward": 0.007399659510701895,
"rewards/frontier_entropy_batch_reward": -0.19110932052135468,
"signal/accuracy_reward/centered_abs_mean": 0.09991455078125,
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
"signal/accuracy_reward/group_std_mean": 0.13368143737316132,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049957275390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049957275390625,
"signal/advantage_abs_mean": 0.0693469613790512,
"signal/advantage_pre_scale_abs_mean": 0.0693469613790512,
"signal/advantage_pre_scale_std": 0.10971838235855103,
"signal/advantage_std": 0.10971838235855103,
"signal/brier_reward/centered_abs_mean": 0.12356914579868317,
"signal/brier_reward/group_bin_occupancy": 0.84921875,
"signal/brier_reward/group_std_mean": 0.16091051399707795,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012356914579868317,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012356914579868317,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011988498829305172,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.946484375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015966850332915783,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011988498736172915,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011988498736172915,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028497665654867886,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
"signal/frontier_aurc_reward/group_std_mean": 0.004724315833300352,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.562208294169977e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.562208294169977e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1604818731546402,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_0/group_std_mean": 0.21088581383228303,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_1/centered_abs_mean": 0.1604818731546402,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_1/group_std_mean": 0.21088581383228303,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_10/centered_abs_mean": 0.1604818731546402,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_10/group_std_mean": 0.21088581383228303,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_15/centered_abs_mean": 0.1604818731546402,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_15/group_std_mean": 0.21088581383228303,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_20/centered_abs_mean": 0.15974161326885222,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_20/group_std_mean": 0.2099863260984421,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00199677012860775,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00199677012860775,
"signal/frontier_coverage_25/centered_abs_mean": 0.1347096398472786,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_25/group_std_mean": 0.17761588990688323,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016838705167174339,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016838705167174339,
"signal/frontier_coverage_5/centered_abs_mean": 0.1604818731546402,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_5/group_std_mean": 0.21088581383228303,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020060235168784858,
"signal/frontier_ece_reward/centered_abs_mean": 0.010506413504481315,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
"signal/frontier_ece_reward/group_std_mean": 0.01373392753303051,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010506413877010346,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010506413877010346,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2652657926082611,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.342242556810379,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652658075094223,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652658075094223,
"step": 160
},
{
"calibration/aurc": 0.2116557679486677,
"calibration/batch_distribution_entropy": 0.9857721529055761,
"calibration/batch_entropy_100bins": 0.9656648820603996,
"calibration/batch_entropy_10bins": 0.9857721529055761,
"calibration/batch_entropy_50bins": 0.9778444274441169,
"calibration/batch_uniqueness": 0.9589366543250588,
"calibration/buffer_distribution_entropy": 0.9982783670040604,
"calibration/buffer_entropy_100bins": 0.9890758203614105,
"calibration/buffer_entropy_10bins": 0.9982783670040604,
"calibration/buffer_entropy_50bins": 0.9945921630733402,
"calibration/confidence_entropy": 0.47936185596729713,
"calibration/coverage@0%": 0.03830418297455969,
"calibration/coverage@1%": 0.03830418297455969,
"calibration/coverage@10%": 0.27363090141878665,
"calibration/coverage@15%": 0.34475752201565557,
"calibration/coverage@20%": 0.5264394263698631,
"calibration/coverage@25%": 0.616313753669276,
"calibration/coverage@30%": 0.7604887781311154,
"calibration/coverage@5%": 0.17943141511741684,
"calibration/ece": 0.11594575333235538,
"calibration/mean_confidence": 0.5187620094786958,
"calibration/prompt_uniqueness": 0.8553659111602497,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1092.6,
"completions/max_terminated_length": 778.2,
"completions/mean_length": 187.25498046875,
"completions/mean_terminated_length": 186.8594207763672,
"completions/min_length": 85.4,
"completions/min_terminated_length": 85.4,
"epoch": 0.528,
"grad_norm": 0.0011601398000493646,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 551304970.0,
"reward": 0.9393844962120056,
"reward_std": 0.08786453604698181,
"rewards/accuracy_reward": 0.54228515625,
"rewards/brier_reward": 0.8001392245292663,
"rewards/confidence_uniqueness_reward": 0.9570415496826172,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0026703955605626105,
"rewards/frontier_coverage_0": 0.12262420728802681,
"rewards/frontier_coverage_1": 0.12262420728802681,
"rewards/frontier_coverage_10": 0.12262420728802681,
"rewards/frontier_coverage_15": 0.12262420728802681,
"rewards/frontier_coverage_20": 0.12122518271207809,
"rewards/frontier_coverage_25": 0.10408189445734024,
"rewards/frontier_coverage_5": 0.12262420728802681,
"rewards/frontier_ece_reward": 0.008721418399363755,
"rewards/frontier_entropy_batch_reward": -0.18648791313171387,
"signal/accuracy_reward/centered_abs_mean": 0.104351806640625,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.1387265920639038,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521759033203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0521759033203125,
"signal/advantage_abs_mean": 0.06842133551836013,
"signal/advantage_pre_scale_abs_mean": 0.06842133551836013,
"signal/advantage_pre_scale_std": 0.1080582544207573,
"signal/advantage_std": 0.1080582544207573,
"signal/brier_reward/centered_abs_mean": 0.12543393820524215,
"signal/brier_reward/group_bin_occupancy": 0.84765625,
"signal/brier_reward/group_std_mean": 0.16112754940986634,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012543394230306149,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012543394230306149,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011928396113216878,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95234375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01579369381070137,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011928396532312035,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011928396532312035,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027430617716163396,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
"signal/frontier_aurc_reward/group_std_mean": 0.004478739900514483,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4288274036953226e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4288274036953226e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17456578612327575,
"signal/frontier_coverage_0/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_0/group_std_mean": 0.22599020898342131,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_1/centered_abs_mean": 0.17456578612327575,
"signal/frontier_coverage_1/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_1/group_std_mean": 0.22599020898342131,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_10/centered_abs_mean": 0.17456578612327575,
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_10/group_std_mean": 0.22599020898342131,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_15/centered_abs_mean": 0.17456578612327575,
"signal/frontier_coverage_15/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_15/group_std_mean": 0.22599020898342131,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_20/centered_abs_mean": 0.17225814163684844,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_20/group_std_mean": 0.22308208048343658,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021532268263399603,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021532268263399603,
"signal/frontier_coverage_25/centered_abs_mean": 0.139526429772377,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85390625,
"signal/frontier_coverage_25/group_std_mean": 0.1816743493080139,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017440804746001958,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017440804746001958,
"signal/frontier_coverage_5/centered_abs_mean": 0.17456578612327575,
"signal/frontier_coverage_5/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_5/group_std_mean": 0.22599020898342131,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021820723544806243,
"signal/frontier_ece_reward/centered_abs_mean": 0.013731100969016552,
"signal/frontier_ece_reward/group_bin_occupancy": 0.84921875,
"signal/frontier_ece_reward/group_std_mean": 0.022244062460958957,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013731101527810097,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013731101527810097,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25302750468254087,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3287863492965698,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02530275024473667,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02530275024473667,
"step": 165
},
{
"calibration/aurc": 0.22570365505708892,
"calibration/batch_distribution_entropy": 0.9843955404941772,
"calibration/batch_entropy_100bins": 0.9662716763828755,
"calibration/batch_entropy_10bins": 0.9843955404941772,
"calibration/batch_entropy_50bins": 0.9777331174175243,
"calibration/batch_uniqueness": 0.959429931640625,
"calibration/buffer_distribution_entropy": 0.9982544609568785,
"calibration/buffer_entropy_100bins": 0.9893260796188164,
"calibration/buffer_entropy_10bins": 0.9982544609568785,
"calibration/buffer_entropy_50bins": 0.9945346311546374,
"calibration/confidence_entropy": 0.4794441883496332,
"calibration/coverage@0%": 0.021875,
"calibration/coverage@1%": 0.05,
"calibration/coverage@10%": 0.196875,
"calibration/coverage@15%": 0.2703125,
"calibration/coverage@20%": 0.538671875,
"calibration/coverage@25%": 0.646484375,
"calibration/coverage@30%": 0.73046875,
"calibration/coverage@5%": 0.11171875,
"calibration/ece": 0.0953630593548969,
"calibration/mean_confidence": 0.5133342115984929,
"calibration/prompt_uniqueness": 0.8658203125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 717.8,
"completions/mean_length": 187.49580078125,
"completions/mean_terminated_length": 186.83703002929687,
"completions/min_length": 81.2,
"completions/min_terminated_length": 81.2,
"epoch": 0.544,
"grad_norm": 0.0014953252393752337,
"learning_rate": 1e-06,
"loss": 0.0019,
"num_tokens": 568388511.0,
"reward": 0.9478225350379944,
"reward_std": 0.09547350853681565,
"rewards/accuracy_reward": 0.5732421875,
"rewards/brier_reward": 0.7815747022628784,
"rewards/confidence_uniqueness_reward": 0.957908034324646,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0027830671519041062,
"rewards/frontier_coverage_0": 0.07353707253932953,
"rewards/frontier_coverage_1": 0.07353707253932953,
"rewards/frontier_coverage_10": 0.07353707253932953,
"rewards/frontier_coverage_15": 0.07353707253932953,
"rewards/frontier_coverage_20": 0.07318145632743836,
"rewards/frontier_coverage_25": 0.058940806239843366,
"rewards/frontier_coverage_5": 0.07353707253932953,
"rewards/frontier_ece_reward": 0.0069223855622112754,
"rewards/frontier_entropy_batch_reward": -0.19407747387886048,
"signal/accuracy_reward/centered_abs_mean": 0.1157470703125,
"signal/accuracy_reward/group_bin_occupancy": 0.182421875,
"signal/accuracy_reward/group_std_mean": 0.15591520071029663,
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05787353515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05787353515625,
"signal/advantage_abs_mean": 0.07254650890827179,
"signal/advantage_pre_scale_abs_mean": 0.07254650890827179,
"signal/advantage_pre_scale_std": 0.11336593627929688,
"signal/advantage_std": 0.11336593627929688,
"signal/brier_reward/centered_abs_mean": 0.13599575757980348,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.1739354431629181,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01359957605600357,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01359957605600357,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012918978370726109,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.931640625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017537206411361694,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012918978696689009,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012918978696689009,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028809635899960996,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.698046875,
"signal/frontier_aurc_reward/group_std_mean": 0.004857636988162994,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.601204407459591e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.601204407459591e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1849408507347107,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_0/group_std_mean": 0.2365315616130829,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_1/centered_abs_mean": 0.1849408507347107,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_1/group_std_mean": 0.2365315616130829,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_10/centered_abs_mean": 0.1849408507347107,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_10/group_std_mean": 0.2365315616130829,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_15/centered_abs_mean": 0.1849408507347107,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_15/group_std_mean": 0.2365315616130829,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_20/centered_abs_mean": 0.18298504054546355,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_20/group_std_mean": 0.2340652674436569,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022873131558299063,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022873131558299063,
"signal/frontier_coverage_25/centered_abs_mean": 0.13958741277456282,
"signal/frontier_coverage_25/group_bin_occupancy": 0.859375,
"signal/frontier_coverage_25/group_std_mean": 0.1793098896741867,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017448426457121967,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017448426457121967,
"signal/frontier_coverage_5/centered_abs_mean": 0.1849408507347107,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_5/group_std_mean": 0.2365315616130829,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002311760699376464,
"signal/frontier_ece_reward/centered_abs_mean": 0.012716376781463623,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83515625,
"signal/frontier_ece_reward/group_std_mean": 0.021388059109449388,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012716377153992654,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012716377153992654,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2714007079601288,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34507684111595155,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027140070497989655,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027140070497989655,
"step": 170
},
{
"calibration/aurc": 0.2630419824140453,
"calibration/batch_distribution_entropy": 0.9855633289125907,
"calibration/batch_entropy_100bins": 0.9657104346168568,
"calibration/batch_entropy_10bins": 0.9855633289125907,
"calibration/batch_entropy_50bins": 0.9780025735821166,
"calibration/batch_uniqueness": 0.9603877337790866,
"calibration/buffer_distribution_entropy": 0.9981646752348159,
"calibration/buffer_entropy_100bins": 0.9893824626518759,
"calibration/buffer_entropy_10bins": 0.9981646752348159,
"calibration/buffer_entropy_50bins": 0.9944247124897367,
"calibration/confidence_entropy": 0.49413471824349064,
"calibration/coverage@0%": 0.011331182729941292,
"calibration/coverage@1%": 0.0863311827299413,
"calibration/coverage@10%": 0.2082061827299413,
"calibration/coverage@15%": 0.24805222602739727,
"calibration/coverage@20%": 0.36997767857142855,
"calibration/coverage@25%": 0.49003791585127204,
"calibration/coverage@30%": 0.6072965080724071,
"calibration/coverage@5%": 0.1652374327299413,
"calibration/ece": 0.13711708315742724,
"calibration/mean_confidence": 0.48495450226497266,
"calibration/prompt_uniqueness": 0.8678187089457596,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 782.2,
"completions/max_terminated_length": 617.6,
"completions/mean_length": 185.22158203125,
"completions/mean_terminated_length": 185.09028015136718,
"completions/min_length": 81.4,
"completions/min_terminated_length": 81.4,
"epoch": 0.56,
"grad_norm": 0.0008502820273861289,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 585106588.0,
"reward": 0.9321273326873779,
"reward_std": 0.08039158433675767,
"rewards/accuracy_reward": 0.53154296875,
"rewards/brier_reward": 0.8001036405563354,
"rewards/confidence_uniqueness_reward": 0.9595265865325928,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002705985214561224,
"rewards/frontier_coverage_0": 0.11961866915225983,
"rewards/frontier_coverage_1": 0.11961866915225983,
"rewards/frontier_coverage_10": 0.11961866915225983,
"rewards/frontier_coverage_15": 0.11961866915225983,
"rewards/frontier_coverage_20": 0.1191520243883133,
"rewards/frontier_coverage_25": 0.08837539106607437,
"rewards/frontier_coverage_5": 0.11961866915225983,
"rewards/frontier_ece_reward": 0.006058618426322937,
"rewards/frontier_entropy_batch_reward": -0.20200627744197847,
"signal/accuracy_reward/centered_abs_mean": 0.082733154296875,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.11375210285186768,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0413665771484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0413665771484375,
"signal/advantage_abs_mean": 0.06228437945246697,
"signal/advantage_pre_scale_abs_mean": 0.06228437945246697,
"signal/advantage_pre_scale_std": 0.09934655725955963,
"signal/advantage_std": 0.09934655725955963,
"signal/brier_reward/centered_abs_mean": 0.12185298353433609,
"signal/brier_reward/group_bin_occupancy": 0.84296875,
"signal/brier_reward/group_std_mean": 0.15725724995136262,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012185298651456834,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012185298651456834,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01168802659958601,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.014986979961395263,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011688026832416653,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011688026832416653,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027294772677123546,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.692578125,
"signal/frontier_aurc_reward/group_std_mean": 0.004910151939839125,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.411846555536613e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.411846555536613e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16406928300857543,
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_0/group_std_mean": 0.2110010415315628,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_1/centered_abs_mean": 0.16406928300857543,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_1/group_std_mean": 0.2110010415315628,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_10/centered_abs_mean": 0.16406928300857543,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_10/group_std_mean": 0.2110010415315628,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_15/centered_abs_mean": 0.16406928300857543,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.2110010415315628,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_20/centered_abs_mean": 0.16182146072387696,
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_20/group_std_mean": 0.2081581711769104,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020227682311087848,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020227682311087848,
"signal/frontier_coverage_25/centered_abs_mean": 0.1146465077996254,
"signal/frontier_coverage_25/group_bin_occupancy": 0.871875,
"signal/frontier_coverage_25/group_std_mean": 0.14809595346450805,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014330813428387046,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014330813428387046,
"signal/frontier_coverage_5/centered_abs_mean": 0.16406928300857543,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_5/group_std_mean": 0.2110010415315628,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020508660934865476,
"signal/frontier_ece_reward/centered_abs_mean": 0.009023293852806091,
"signal/frontier_ece_reward/group_bin_occupancy": 0.87109375,
"signal/frontier_ece_reward/group_std_mean": 0.012150265648961068,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009023294202052057,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009023294202052057,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2724481761455536,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3477316856384277,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027244817838072775,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027244817838072775,
"step": 175
},
{
"calibration/aurc": 0.309636001445996,
"calibration/batch_distribution_entropy": 0.9839093996134368,
"calibration/batch_entropy_100bins": 0.9625499427091299,
"calibration/batch_entropy_10bins": 0.9839093996134368,
"calibration/batch_entropy_50bins": 0.972969015575497,
"calibration/batch_uniqueness": 0.960882568359375,
"calibration/buffer_distribution_entropy": 0.998281991860735,
"calibration/buffer_entropy_100bins": 0.9894724328721299,
"calibration/buffer_entropy_10bins": 0.998281991860735,
"calibration/buffer_entropy_50bins": 0.9944521850113789,
"calibration/confidence_entropy": 0.49251360413414175,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.137890625,
"calibration/coverage@15%": 0.208984375,
"calibration/coverage@20%": 0.284765625,
"calibration/coverage@25%": 0.343359375,
"calibration/coverage@30%": 0.46015625,
"calibration/coverage@5%": 0.0421875,
"calibration/ece": 0.08670626321827299,
"calibration/mean_confidence": 0.489370553214073,
"calibration/prompt_uniqueness": 0.857275390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 735.4,
"completions/max_terminated_length": 592.0,
"completions/mean_length": 181.85361328125,
"completions/mean_terminated_length": 181.58960876464843,
"completions/min_length": 81.6,
"completions/min_terminated_length": 81.6,
"epoch": 0.576,
"grad_norm": 0.0010482225334271789,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 602155393.0,
"reward": 0.9271166443824768,
"reward_std": 0.07998622953891754,
"rewards/accuracy_reward": 0.52412109375,
"rewards/brier_reward": 0.7888967275619507,
"rewards/confidence_uniqueness_reward": 0.9595050811767578,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003116795467212796,
"rewards/frontier_coverage_0": 0.1158630833029747,
"rewards/frontier_coverage_1": 0.1158630833029747,
"rewards/frontier_coverage_10": 0.1158630833029747,
"rewards/frontier_coverage_15": 0.1158630833029747,
"rewards/frontier_coverage_20": 0.11500565633177758,
"rewards/frontier_coverage_25": 0.08373434320092202,
"rewards/frontier_coverage_5": 0.1158630833029747,
"rewards/frontier_ece_reward": 0.005350236594676971,
"rewards/frontier_entropy_batch_reward": -0.1985933691263199,
"signal/accuracy_reward/centered_abs_mean": 0.077752685546875,
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
"signal/accuracy_reward/group_std_mean": 0.1094050019979477,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0388763427734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0388763427734375,
"signal/advantage_abs_mean": 0.06033368557691574,
"signal/advantage_pre_scale_abs_mean": 0.06033368557691574,
"signal/advantage_pre_scale_std": 0.09716939330101013,
"signal/advantage_std": 0.09716939330101013,
"signal/brier_reward/centered_abs_mean": 0.12069027125835419,
"signal/brier_reward/group_bin_occupancy": 0.85546875,
"signal/brier_reward/group_std_mean": 0.1551447778940201,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012069026939570904,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012069026939570904,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01292349398136139,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016793293692171574,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292349398136139,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292349398136139,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028868647757917644,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7015625,
"signal/frontier_aurc_reward/group_std_mean": 0.005015233065932989,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.608580991567578e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.608580991567578e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16046448349952697,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_0/group_std_mean": 0.20543249547481537,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_1/centered_abs_mean": 0.16046448349952697,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_1/group_std_mean": 0.20543249547481537,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_10/centered_abs_mean": 0.16046448349952697,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_10/group_std_mean": 0.20543249547481537,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_15/centered_abs_mean": 0.16046448349952697,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_15/group_std_mean": 0.20543249547481537,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_20/centered_abs_mean": 0.15846198201179504,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.20285292565822602,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019807748030871153,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019807748030871153,
"signal/frontier_coverage_25/centered_abs_mean": 0.10855960100889206,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_25/group_std_mean": 0.13992275893688202,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001356995035894215,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001356995035894215,
"signal/frontier_coverage_5/centered_abs_mean": 0.16046448349952697,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_5/group_std_mean": 0.20543249547481537,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002005806053057313,
"signal/frontier_ece_reward/centered_abs_mean": 0.009020330384373665,
"signal/frontier_ece_reward/group_bin_occupancy": 0.846875,
"signal/frontier_ece_reward/group_std_mean": 0.012636875361204147,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009020330267958343,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009020330267958343,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2665239542722702,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34042556285858155,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026652396842837333,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026652396842837333,
"step": 180
},
{
"calibration/aurc": 0.29726541114295457,
"calibration/batch_distribution_entropy": 0.9834822954947106,
"calibration/batch_entropy_100bins": 0.9608169793074197,
"calibration/batch_entropy_10bins": 0.9834822954947106,
"calibration/batch_entropy_50bins": 0.9750636920076771,
"calibration/batch_uniqueness": 0.9607329022889346,
"calibration/buffer_distribution_entropy": 0.9983789319326808,
"calibration/buffer_entropy_100bins": 0.9895114798338899,
"calibration/buffer_entropy_10bins": 0.9983789319326808,
"calibration/buffer_entropy_50bins": 0.9945117700392121,
"calibration/confidence_entropy": 0.48444024753987314,
"calibration/coverage@0%": 0.01800085616438356,
"calibration/coverage@1%": 0.01800085616438356,
"calibration/coverage@10%": 0.16278666218199608,
"calibration/coverage@15%": 0.30757323263209396,
"calibration/coverage@20%": 0.4346991193737769,
"calibration/coverage@25%": 0.5273888515166341,
"calibration/coverage@30%": 0.5982035836594912,
"calibration/coverage@5%": 0.08451565557729941,
"calibration/ece": 0.12100732825692273,
"calibration/mean_confidence": 0.49416566136722384,
"calibration/prompt_uniqueness": 0.8536409725383713,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1118.0,
"completions/max_terminated_length": 492.2,
"completions/mean_length": 176.89560546875,
"completions/mean_terminated_length": 176.4981475830078,
"completions/min_length": 79.8,
"completions/min_terminated_length": 79.8,
"epoch": 0.592,
"grad_norm": 0.0010737936245277524,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 619134516.0,
"reward": 0.9240444660186767,
"reward_std": 0.0810801163315773,
"rewards/accuracy_reward": 0.5212890625,
"rewards/brier_reward": 0.790893018245697,
"rewards/confidence_uniqueness_reward": 0.9601579666137695,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.00303261773660779,
"rewards/frontier_coverage_0": 0.12167765200138092,
"rewards/frontier_coverage_1": 0.12167765200138092,
"rewards/frontier_coverage_10": 0.12167765200138092,
"rewards/frontier_coverage_15": 0.12167765200138092,
"rewards/frontier_coverage_20": 0.1199147269129753,
"rewards/frontier_coverage_25": 0.08222576975822449,
"rewards/frontier_coverage_5": 0.12167765200138092,
"rewards/frontier_ece_reward": 0.0055978668853640555,
"rewards/frontier_entropy_batch_reward": -0.22212174534797668,
"signal/accuracy_reward/centered_abs_mean": 0.08709716796875,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.11671981066465378,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043548583984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043548583984375,
"signal/advantage_abs_mean": 0.0631883479654789,
"signal/advantage_pre_scale_abs_mean": 0.0631883479654789,
"signal/advantage_pre_scale_std": 0.09987544417381286,
"signal/advantage_std": 0.09987544417381286,
"signal/brier_reward/centered_abs_mean": 0.11597198843955994,
"signal/brier_reward/group_bin_occupancy": 0.838671875,
"signal/brier_reward/group_std_mean": 0.149802365899086,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011597198992967605,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011597198992967605,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012930301018059254,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.911328125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017115654610097408,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012930301018059254,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012930301018059254,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002846223535016179,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625,
"signal/frontier_aurc_reward/group_std_mean": 0.004566754633560777,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.557779564289376e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.557779564289376e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1627124637365341,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_0/group_std_mean": 0.20693700313568114,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_1/centered_abs_mean": 0.1627124637365341,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_1/group_std_mean": 0.20693700313568114,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_10/centered_abs_mean": 0.1627124637365341,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.20693700313568114,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_15/centered_abs_mean": 0.1627124637365341,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.20693700313568114,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_20/centered_abs_mean": 0.15905381739139557,
"signal/frontier_coverage_20/group_bin_occupancy": 0.867578125,
"signal/frontier_coverage_20/group_std_mean": 0.20235534608364106,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001988172740675509,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001988172740675509,
"signal/frontier_coverage_25/centered_abs_mean": 0.10259814411401749,
"signal/frontier_coverage_25/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_25/group_std_mean": 0.1315797194838524,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012824768433347344,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012824768433347344,
"signal/frontier_coverage_5/centered_abs_mean": 0.1627124637365341,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.20693700313568114,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020339058246463537,
"signal/frontier_ece_reward/centered_abs_mean": 0.009119224734604359,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8375,
"signal/frontier_ece_reward/group_std_mean": 0.012649010121822356,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009119224967435002,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009119224967435002,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2906370997428894,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.723828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3651686549186707,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029063709452748297,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029063709452748297,
"step": 185
},
{
"calibration/aurc": 0.23078012934450687,
"calibration/batch_distribution_entropy": 0.972169633516365,
"calibration/batch_entropy_100bins": 0.9577764655227113,
"calibration/batch_entropy_10bins": 0.972169633516365,
"calibration/batch_entropy_50bins": 0.9698115325872496,
"calibration/batch_uniqueness": 0.958111572265625,
"calibration/buffer_distribution_entropy": 0.9983615562842496,
"calibration/buffer_entropy_100bins": 0.989554537741407,
"calibration/buffer_entropy_10bins": 0.9983615562842496,
"calibration/buffer_entropy_50bins": 0.9944703301690406,
"calibration/confidence_entropy": 0.4702111359575749,
"calibration/coverage@0%": 0.074609375,
"calibration/coverage@1%": 0.0796875,
"calibration/coverage@10%": 0.249609375,
"calibration/coverage@15%": 0.390234375,
"calibration/coverage@20%": 0.530078125,
"calibration/coverage@25%": 0.60859375,
"calibration/coverage@30%": 0.70234375,
"calibration/coverage@5%": 0.15078125,
"calibration/ece": 0.11253724402326422,
"calibration/mean_confidence": 0.4741178147270788,
"calibration/prompt_uniqueness": 0.855712890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 956.2,
"completions/max_terminated_length": 574.2,
"completions/mean_length": 183.07431640625,
"completions/mean_terminated_length": 182.80919494628907,
"completions/min_length": 82.8,
"completions/min_terminated_length": 82.8,
"epoch": 0.608,
"grad_norm": 0.0008162545855157077,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 636008685.0,
"reward": 0.932918655872345,
"reward_std": 0.07714778482913971,
"rewards/accuracy_reward": 0.5296875,
"rewards/brier_reward": 0.809378182888031,
"rewards/confidence_uniqueness_reward": 0.959358549118042,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002335884002968669,
"rewards/frontier_coverage_0": 0.13527322113513945,
"rewards/frontier_coverage_1": 0.13527322113513945,
"rewards/frontier_coverage_10": 0.13527322113513945,
"rewards/frontier_coverage_15": 0.13527322113513945,
"rewards/frontier_coverage_20": 0.12897036075592042,
"rewards/frontier_coverage_25": 0.08629466593265533,
"rewards/frontier_coverage_5": 0.13527322113513945,
"rewards/frontier_ece_reward": 0.006201074831187725,
"rewards/frontier_entropy_batch_reward": -0.20437394380569457,
"signal/accuracy_reward/centered_abs_mean": 0.08509521484375,
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
"signal/accuracy_reward/group_std_mean": 0.1144148737192154,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042547607421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042547607421875,
"signal/advantage_abs_mean": 0.05944623276591301,
"signal/advantage_pre_scale_abs_mean": 0.05944623276591301,
"signal/advantage_pre_scale_std": 0.09432210624217988,
"signal/advantage_std": 0.09432210624217988,
"signal/brier_reward/centered_abs_mean": 0.11613385826349258,
"signal/brier_reward/group_bin_occupancy": 0.843359375,
"signal/brier_reward/group_std_mean": 0.14919577836990355,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011613386496901513,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011613386496901513,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012426980212330819,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9140625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0162442235276103,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012426980305463077,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012426980305463077,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002181270159780979,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_aurc_reward/group_std_mean": 0.003462765412405133,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.726587808865588e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.726587808865588e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17015551626682282,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_0/group_std_mean": 0.2166207551956177,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_1/centered_abs_mean": 0.17015551626682282,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_1/group_std_mean": 0.2166207551956177,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_10/centered_abs_mean": 0.17015551626682282,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_10/group_std_mean": 0.2166207551956177,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_15/centered_abs_mean": 0.17015551626682282,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_15/group_std_mean": 0.2166207551956177,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_20/centered_abs_mean": 0.16215289533138275,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_20/group_std_mean": 0.20657850205898284,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020269112894311546,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020269112894311546,
"signal/frontier_coverage_25/centered_abs_mean": 0.09889246076345444,
"signal/frontier_coverage_25/group_bin_occupancy": 0.878515625,
"signal/frontier_coverage_25/group_std_mean": 0.1259763240814209,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012361557688564061,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012361557688564061,
"signal/frontier_coverage_5/centered_abs_mean": 0.17015551626682282,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86015625,
"signal/frontier_coverage_5/group_std_mean": 0.2166207551956177,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002126943925395608,
"signal/frontier_ece_reward/centered_abs_mean": 0.008855049218982457,
"signal/frontier_ece_reward/group_bin_occupancy": 0.825,
"signal/frontier_ece_reward/group_std_mean": 0.012453357130289078,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008855049381963909,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008855049381963909,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709381639957428,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3473371982574463,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02709381692111492,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02709381692111492,
"step": 190
},
{
"calibration/aurc": 0.24190303576000538,
"calibration/batch_distribution_entropy": 0.9851080109571406,
"calibration/batch_entropy_100bins": 0.9660194285586039,
"calibration/batch_entropy_10bins": 0.9851080109571406,
"calibration/batch_entropy_50bins": 0.9779150487479604,
"calibration/batch_uniqueness": 0.9610351488052915,
"calibration/buffer_distribution_entropy": 0.9984440017454219,
"calibration/buffer_entropy_100bins": 0.98965536789939,
"calibration/buffer_entropy_10bins": 0.9984440017454219,
"calibration/buffer_entropy_50bins": 0.9944964147279853,
"calibration/confidence_entropy": 0.4965098505390208,
"calibration/coverage@0%": 0.01917196673189824,
"calibration/coverage@1%": 0.01917196673189824,
"calibration/coverage@10%": 0.12628424657534248,
"calibration/coverage@15%": 0.30368685787671235,
"calibration/coverage@20%": 0.44866530088062617,
"calibration/coverage@25%": 0.5823018590998043,
"calibration/coverage@30%": 0.6850691046966733,
"calibration/coverage@5%": 0.048483365949119374,
"calibration/ece": 0.08340996381299517,
"calibration/mean_confidence": 0.516178190966678,
"calibration/prompt_uniqueness": 0.8672639686036681,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 779.6,
"completions/max_terminated_length": 570.2,
"completions/mean_length": 184.97431640625,
"completions/mean_terminated_length": 184.84255981445312,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.624,
"grad_norm": 0.0011064645368605852,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 653246726.0,
"reward": 0.9357229709625244,
"reward_std": 0.08368170112371445,
"rewards/accuracy_reward": 0.5328125,
"rewards/brier_reward": 0.8055280208587646,
"rewards/confidence_uniqueness_reward": 0.9595144271850586,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0028843384236097334,
"rewards/frontier_coverage_0": 0.12474274337291717,
"rewards/frontier_coverage_1": 0.12474274337291717,
"rewards/frontier_coverage_10": 0.12474274337291717,
"rewards/frontier_coverage_15": 0.12449503540992737,
"rewards/frontier_coverage_20": 0.11553706079721451,
"rewards/frontier_coverage_25": 0.07678574174642563,
"rewards/frontier_coverage_5": 0.12474274337291717,
"rewards/frontier_ece_reward": 0.005106198182329535,
"rewards/frontier_entropy_batch_reward": -0.1781061351299286,
"signal/accuracy_reward/centered_abs_mean": 0.09542236328125,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.127110655605793,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047711181640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047711181640625,
"signal/advantage_abs_mean": 0.06504265516996384,
"signal/advantage_pre_scale_abs_mean": 0.06504265516996384,
"signal/advantage_pre_scale_std": 0.10494562834501267,
"signal/advantage_std": 0.10494562834501267,
"signal/brier_reward/centered_abs_mean": 0.11675633937120437,
"signal/brier_reward/group_bin_occupancy": 0.855859375,
"signal/brier_reward/group_std_mean": 0.1501062899827957,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011675634235143662,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011675634235143662,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011868251860141754,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015202015824615955,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011868252186104655,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011868252186104655,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027540235314518213,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70546875,
"signal/frontier_aurc_reward/group_std_mean": 0.004497009515762329,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442529414314777e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442529414314777e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15991105139255524,
"signal/frontier_coverage_0/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_0/group_std_mean": 0.2048025608062744,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_1/centered_abs_mean": 0.15991105139255524,
"signal/frontier_coverage_1/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_1/group_std_mean": 0.2048025608062744,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_10/centered_abs_mean": 0.15991105139255524,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_10/group_std_mean": 0.2048025608062744,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_15/centered_abs_mean": 0.15969133675098418,
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_15/group_std_mean": 0.20454807877540587,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001996141788549721,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001996141788549721,
"signal/frontier_coverage_20/centered_abs_mean": 0.1426139533519745,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_20/group_std_mean": 0.18301699459552764,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00178267452865839,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00178267452865839,
"signal/frontier_coverage_25/centered_abs_mean": 0.08272561132907867,
"signal/frontier_coverage_25/group_bin_occupancy": 0.899609375,
"signal/frontier_coverage_25/group_std_mean": 0.10734816044569015,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001034070155583322,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001034070155583322,
"signal/frontier_coverage_5/centered_abs_mean": 0.15991105139255524,
"signal/frontier_coverage_5/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_5/group_std_mean": 0.2048025608062744,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019988882122561336,
"signal/frontier_ece_reward/centered_abs_mean": 0.00812565665692091,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83671875,
"signal/frontier_ece_reward/group_std_mean": 0.011584336683154107,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008125656750053167,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008125656750053167,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2498374253511429,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3267929255962372,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024983742833137514,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024983742833137514,
"step": 195
},
{
"calibration/aurc": 0.26512359032820576,
"calibration/batch_distribution_entropy": 0.9819760604956709,
"calibration/batch_entropy_100bins": 0.961525383497633,
"calibration/batch_entropy_10bins": 0.9819760604956709,
"calibration/batch_entropy_50bins": 0.9735273980284143,
"calibration/batch_uniqueness": 0.9592022574135403,
"calibration/buffer_distribution_entropy": 0.9985324674567476,
"calibration/buffer_entropy_100bins": 0.9898235209789318,
"calibration/buffer_entropy_10bins": 0.9985324674567476,
"calibration/buffer_entropy_50bins": 0.9945705588322357,
"calibration/confidence_entropy": 0.5043373180268479,
"calibration/coverage@0%": 0.06848550636007827,
"calibration/coverage@1%": 0.07318217954990215,
"calibration/coverage@10%": 0.3003011863992172,
"calibration/coverage@15%": 0.35930925880626224,
"calibration/coverage@20%": 0.3980078889432485,
"calibration/coverage@25%": 0.4449081152152642,
"calibration/coverage@30%": 0.6000076443248532,
"calibration/coverage@5%": 0.23814135885518595,
"calibration/ece": 0.16721492077336086,
"calibration/mean_confidence": 0.5371362845790759,
"calibration/prompt_uniqueness": 0.8633109228668054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0009765625,
"completions/max_length": 1131.8,
"completions/max_terminated_length": 729.0,
"completions/mean_length": 188.35107421875,
"completions/mean_terminated_length": 187.03569641113282,
"completions/min_length": 82.4,
"completions/min_terminated_length": 82.4,
"epoch": 0.64,
"grad_norm": 0.0009473967947997153,
"learning_rate": 1e-06,
"loss": 0.002,
"num_tokens": 670518129.0,
"reward": 0.9468509316444397,
"reward_std": 0.07844078540802002,
"rewards/accuracy_reward": 0.5681640625,
"rewards/brier_reward": 0.8030801296234131,
"rewards/confidence_uniqueness_reward": 0.957956874370575,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.0027081962209194897,
"rewards/frontier_coverage_0": 0.09533136114478111,
"rewards/frontier_coverage_1": 0.09533136114478111,
"rewards/frontier_coverage_10": 0.09533136114478111,
"rewards/frontier_coverage_15": 0.0953597754240036,
"rewards/frontier_coverage_20": 0.08704339265823365,
"rewards/frontier_coverage_25": 0.05847667083144188,
"rewards/frontier_coverage_5": 0.09533136114478111,
"rewards/frontier_ece_reward": 0.005162352602928877,
"rewards/frontier_entropy_batch_reward": -0.21057653427124023,
"signal/accuracy_reward/centered_abs_mean": 0.07152099609375,
"signal/accuracy_reward/group_bin_occupancy": 0.162109375,
"signal/accuracy_reward/group_std_mean": 0.09872582405805588,
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035760498046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035760498046875,
"signal/advantage_abs_mean": 0.0600838340818882,
"signal/advantage_pre_scale_abs_mean": 0.0600838340818882,
"signal/advantage_pre_scale_std": 0.09821470826864243,
"signal/advantage_std": 0.09821470826864243,
"signal/brier_reward/centered_abs_mean": 0.11101796627044677,
"signal/brier_reward/group_bin_occupancy": 0.848828125,
"signal/brier_reward/group_std_mean": 0.14420543015003204,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011101796850562095,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011101796850562095,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013335288688540458,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01753148380666971,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013335288735106588,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013335288735106588,
"signal/format_reward/centered_abs_mean": 0.001739501953125,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0030320982448756697,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008697509765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008697509765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027640830259770153,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375,
"signal/frontier_aurc_reward/group_std_mean": 0.004512441391125321,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.455103724263609e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.455103724263609e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.139675572514534,
"signal/frontier_coverage_0/group_bin_occupancy": 0.875,
"signal/frontier_coverage_0/group_std_mean": 0.18167279958724974,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_1/centered_abs_mean": 0.139675572514534,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.18167279958724974,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_10/centered_abs_mean": 0.139675572514534,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
"signal/frontier_coverage_10/group_std_mean": 0.18167279958724974,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_15/centered_abs_mean": 0.1394558221101761,
"signal/frontier_coverage_15/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_15/group_std_mean": 0.18138521909713745,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017431978834792972,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017431978834792972,
"signal/frontier_coverage_20/centered_abs_mean": 0.1208455815911293,
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_20/group_std_mean": 0.15725071132183074,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015105698024854065,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015105698024854065,
"signal/frontier_coverage_25/centered_abs_mean": 0.07018533274531365,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8984375,
"signal/frontier_coverage_25/group_std_mean": 0.09095044732093811,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008773167035542428,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008773167035542428,
"signal/frontier_coverage_5/centered_abs_mean": 0.139675572514534,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.18167279958724974,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459447728469968,
"signal/frontier_ece_reward/centered_abs_mean": 0.008315538614988327,
"signal/frontier_ece_reward/group_bin_occupancy": 0.822265625,
"signal/frontier_ece_reward/group_std_mean": 0.01194094903767109,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008315538754686714,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008315538754686714,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27706546187400816,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35140617489814757,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02770654745399952,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02770654745399952,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.4566159075334175,
"eval_calibration/batch_distribution_entropy": 0.9163126557855614,
"eval_calibration/batch_entropy_100bins": 0.7178744913412122,
"eval_calibration/batch_entropy_10bins": 0.9163126557855614,
"eval_calibration/batch_entropy_50bins": 0.798684377132812,
"eval_calibration/batch_uniqueness": 0.904296875,
"eval_calibration/buffer_distribution_entropy": 0.9985671709970653,
"eval_calibration/buffer_entropy_100bins": 0.9900184255261969,
"eval_calibration/buffer_entropy_10bins": 0.9985671709970653,
"eval_calibration/buffer_entropy_50bins": 0.9946421013154033,
"eval_calibration/confidence_entropy": 0.5005098902291641,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.09375,
"eval_calibration/coverage@20%": 0.1875,
"eval_calibration/coverage@25%": 0.2109375,
"eval_calibration/coverage@30%": 0.25,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.19819265669162003,
"eval_calibration/mean_confidence": 0.44646902260571164,
"eval_calibration/prompt_uniqueness": 0.904296875,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 373.25,
"eval_completions/max_terminated_length": 373.25,
"eval_completions/mean_length": 191.53293228149414,
"eval_completions/mean_terminated_length": 191.53293228149414,
"eval_completions/min_length": 95.5,
"eval_completions/min_terminated_length": 95.5,
"eval_loss": 0.0,
"eval_num_tokens": 670518129.0,
"eval_reward": 0.799683153629303,
"eval_reward_std": 0.22493423148989677,
"eval_rewards/accuracy_reward": 0.4296875,
"eval_rewards/brier_reward": 0.7988216429948807,
"eval_rewards/confidence_uniqueness_reward": 0.89794921875,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0034247017465531826,
"eval_rewards/frontier_coverage_0": 0.18689577654004097,
"eval_rewards/frontier_coverage_1": 0.18689577654004097,
"eval_rewards/frontier_coverage_10": 0.18689577654004097,
"eval_rewards/frontier_coverage_15": 0.18632838502526283,
"eval_rewards/frontier_coverage_20": 0.1586691550910473,
"eval_rewards/frontier_coverage_25": 0.08706778101623058,
"eval_rewards/frontier_coverage_5": 0.18689577654004097,
"eval_rewards/frontier_ece_reward": 0.004595339996740222,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 19.8251,
"eval_samples_per_second": 25.22,
"eval_signal/accuracy_reward/centered_abs_mean": 0.47509765625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49481892585754395,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.237548828125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.237548828125,
"eval_signal/advantage_abs_mean": 0.21163957193493843,
"eval_signal/advantage_pre_scale_abs_mean": 0.21163957193493843,
"eval_signal/advantage_pre_scale_std": 0.2224200740456581,
"eval_signal/advantage_std": 0.2224200740456581,
"eval_signal/brier_reward/centered_abs_mean": 0.18079102784395218,
"eval_signal/brier_reward/group_bin_occupancy": 0.8828125,
"eval_signal/brier_reward/group_std_mean": 0.2304544784128666,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018079102504998446,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018079102504998446,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0389862060546875,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.046543585136532784,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003898620721884072,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003898620721884072,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004244803451001644,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6015625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008210767526179552,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.306004641170148e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.306004641170148e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36475419253110886,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4384455382823944,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36475419253110886,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4384455382823944,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36475419253110886,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4384455382823944,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3635733351111412,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_15/group_std_mean": 0.43705061078071594,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544666619040072,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544666619040072,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3101271614432335,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.984375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.37565645575523376,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038765897625125945,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038765897625125945,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.15158939361572266,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.19350523501634598,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018948675133287907,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018948675133287907,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36475419253110886,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4384455382823944,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004559427383355796,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00890616630204022,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.890625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.013190251076593995,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008906166476663202,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008906166476663202,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.202,
"step": 200
},
{
"calibration/aurc": 0.41371530585067084,
"calibration/batch_distribution_entropy": 0.968526477144453,
"calibration/batch_entropy_100bins": 0.9606235974216354,
"calibration/batch_entropy_10bins": 0.968526477144453,
"calibration/batch_entropy_50bins": 0.9681260324091415,
"calibration/batch_uniqueness": 0.9544647216796875,
"calibration/buffer_distribution_entropy": 0.9986671560208302,
"calibration/buffer_entropy_100bins": 0.9902706960336737,
"calibration/buffer_entropy_10bins": 0.9986671560208302,
"calibration/buffer_entropy_50bins": 0.9947728709798141,
"calibration/confidence_entropy": 0.5252373594473655,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.0140625,
"calibration/coverage@15%": 0.06171875,
"calibration/coverage@20%": 0.1109375,
"calibration/coverage@25%": 0.179296875,
"calibration/coverage@30%": 0.28984375,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.10538980298983983,
"calibration/mean_confidence": 0.4486731517153184,
"calibration/prompt_uniqueness": 0.8537109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 558.0,
"completions/max_terminated_length": 558.0,
"completions/mean_length": 188.62841796875,
"completions/mean_terminated_length": 188.62841796875,
"completions/min_length": 86.2,
"completions/min_terminated_length": 86.2,
"epoch": 0.656,
"grad_norm": 0.0009614454465918243,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 687306228.0,
"reward": 0.9128621459007263,
"reward_std": 0.08489621281623841,
"rewards/accuracy_reward": 0.49951171875,
"rewards/brier_reward": 0.7791517615318299,
"rewards/confidence_uniqueness_reward": 0.9559079051017761,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.00292632021009922,
"rewards/frontier_coverage_0": 0.11452654302120209,
"rewards/frontier_coverage_1": 0.11452654302120209,
"rewards/frontier_coverage_10": 0.11452654302120209,
"rewards/frontier_coverage_15": 0.11425123661756516,
"rewards/frontier_coverage_20": 0.0982695385813713,
"rewards/frontier_coverage_25": 0.05823923796415329,
"rewards/frontier_coverage_5": 0.11452654302120209,
"rewards/frontier_ece_reward": 0.0032087708823382854,
"rewards/frontier_entropy_batch_reward": -0.1974597692489624,
"signal/accuracy_reward/centered_abs_mean": 0.094403076171875,
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
"signal/accuracy_reward/group_std_mean": 0.12853155434131622,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0472015380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0472015380859375,
"signal/advantage_abs_mean": 0.0651530534029007,
"signal/advantage_pre_scale_abs_mean": 0.0651530534029007,
"signal/advantage_pre_scale_std": 0.1036272794008255,
"signal/advantage_std": 0.1036272794008255,
"signal/brier_reward/centered_abs_mean": 0.12340695858001709,
"signal/brier_reward/group_bin_occupancy": 0.872265625,
"signal/brier_reward/group_std_mean": 0.15771982073783875,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012340695783495902,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012340695783495902,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012716875597834586,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016173630580306055,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012716875644400716,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012716875644400716,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025363420136272907,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7109375,
"signal/frontier_aurc_reward/group_std_mean": 0.004503958486020565,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.170427517034114e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.170427517034114e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1702498823404312,
"signal/frontier_coverage_0/group_bin_occupancy": 0.887109375,
"signal/frontier_coverage_0/group_std_mean": 0.2185587167739868,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_1/centered_abs_mean": 0.1702498823404312,
"signal/frontier_coverage_1/group_bin_occupancy": 0.887109375,
"signal/frontier_coverage_1/group_std_mean": 0.2185587167739868,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_10/centered_abs_mean": 0.1702498823404312,
"signal/frontier_coverage_10/group_bin_occupancy": 0.887109375,
"signal/frontier_coverage_10/group_std_mean": 0.2185587167739868,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_15/centered_abs_mean": 0.1695919394493103,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88671875,
"signal/frontier_coverage_15/group_std_mean": 0.21770275235176087,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021198994014412164,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021198994014412164,
"signal/frontier_coverage_20/centered_abs_mean": 0.1464843899011612,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_20/group_std_mean": 0.18815037310123445,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018310548504814506,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018310548504814506,
"signal/frontier_coverage_25/centered_abs_mean": 0.0794641137123108,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9,
"signal/frontier_coverage_25/group_std_mean": 0.10283097177743912,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009933014633134007,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009933014633134007,
"signal/frontier_coverage_5/centered_abs_mean": 0.1702498823404312,
"signal/frontier_coverage_5/group_bin_occupancy": 0.887109375,
"signal/frontier_coverage_5/group_std_mean": 0.2185587167739868,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021281236317008735,
"signal/frontier_ece_reward/centered_abs_mean": 0.006669469363987446,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82109375,
"signal/frontier_ece_reward/group_std_mean": 0.009653137251734733,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006669469643384218,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006669469643384218,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26615132987499235,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3430874884128571,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026615133881568907,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026615133881568907,
"step": 205
},
{
"calibration/aurc": 0.2996998322733083,
"calibration/batch_distribution_entropy": 0.9712911308832048,
"calibration/batch_entropy_100bins": 0.9581911832477289,
"calibration/batch_entropy_10bins": 0.9712911308832048,
"calibration/batch_entropy_50bins": 0.9694983488466955,
"calibration/batch_uniqueness": 0.9568363156374307,
"calibration/buffer_distribution_entropy": 0.9989196857105181,
"calibration/buffer_entropy_100bins": 0.9907594188460136,
"calibration/buffer_entropy_10bins": 0.9989196857105181,
"calibration/buffer_entropy_50bins": 0.9950373549425547,
"calibration/confidence_entropy": 0.49656147261144473,
"calibration/coverage@0%": 0.01328125,
"calibration/coverage@1%": 0.01328125,
"calibration/coverage@10%": 0.12422639432485323,
"calibration/coverage@15%": 0.16836701932485323,
"calibration/coverage@20%": 0.2230545193248532,
"calibration/coverage@25%": 0.3297280149217221,
"calibration/coverage@30%": 0.4953736545988258,
"calibration/coverage@5%": 0.031640625,
"calibration/ece": 0.13407407064959234,
"calibration/mean_confidence": 0.4797791452804888,
"calibration/prompt_uniqueness": 0.8583812638202394,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 828.0,
"completions/max_terminated_length": 688.2,
"completions/mean_length": 188.770703125,
"completions/mean_terminated_length": 188.6397918701172,
"completions/min_length": 87.4,
"completions/min_terminated_length": 87.4,
"epoch": 0.672,
"grad_norm": 0.0009502097382210195,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 704152680.0,
"reward": 0.9265612006187439,
"reward_std": 0.07872170060873032,
"rewards/accuracy_reward": 0.5279296875,
"rewards/brier_reward": 0.7858775019645691,
"rewards/confidence_uniqueness_reward": 0.9564463257789612,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.00274044550023973,
"rewards/frontier_coverage_0": 0.11836416125297547,
"rewards/frontier_coverage_1": 0.11836416125297547,
"rewards/frontier_coverage_10": 0.11836416125297547,
"rewards/frontier_coverage_15": 0.11800117641687394,
"rewards/frontier_coverage_20": 0.10758722573518753,
"rewards/frontier_coverage_25": 0.0648583009839058,
"rewards/frontier_coverage_5": 0.11836416125297547,
"rewards/frontier_ece_reward": 0.003610279364511371,
"rewards/frontier_entropy_batch_reward": -0.21462770104408263,
"signal/accuracy_reward/centered_abs_mean": 0.08948974609375,
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
"signal/accuracy_reward/group_std_mean": 0.11885513663291931,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044744873046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044744873046875,
"signal/advantage_abs_mean": 0.060820522159337996,
"signal/advantage_pre_scale_abs_mean": 0.060820522159337996,
"signal/advantage_pre_scale_std": 0.09661759734153748,
"signal/advantage_std": 0.09661759734153748,
"signal/brier_reward/centered_abs_mean": 0.1223609670996666,
"signal/brier_reward/group_bin_occupancy": 0.8421875,
"signal/brier_reward/group_std_mean": 0.15609249770641326,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012236096523702144,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012236096523702144,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013496090844273567,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.926953125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01729346551001072,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013496090890839697,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013496090890839697,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024223918560892345,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375,
"signal/frontier_aurc_reward/group_std_mean": 0.004061613464727998,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.027989914698992e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.027989914698992e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17780146598815919,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_0/group_std_mean": 0.2251005709171295,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_1/centered_abs_mean": 0.17780146598815919,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_1/group_std_mean": 0.2251005709171295,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_10/centered_abs_mean": 0.17780146598815919,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_10/group_std_mean": 0.2251005709171295,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_15/centered_abs_mean": 0.17655244171619416,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_15/group_std_mean": 0.2235410749912262,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022069055587053297,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022069055587053297,
"signal/frontier_coverage_20/centered_abs_mean": 0.15346194803714752,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85859375,
"signal/frontier_coverage_20/group_std_mean": 0.19473823606967927,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019182743271812797,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019182743271812797,
"signal/frontier_coverage_25/centered_abs_mean": 0.08100719451904297,
"signal/frontier_coverage_25/group_bin_occupancy": 0.896875,
"signal/frontier_coverage_25/group_std_mean": 0.10340845137834549,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010125899803824722,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010125899803824722,
"signal/frontier_coverage_5/centered_abs_mean": 0.17780146598815919,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_5/group_std_mean": 0.2251005709171295,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002222518343478441,
"signal/frontier_ece_reward/centered_abs_mean": 0.007257478311657906,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83125,
"signal/frontier_ece_reward/group_std_mean": 0.010310792177915574,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007257478660903871,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007257478660903871,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2733268320560455,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.721484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34998972415924073,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027332685142755508,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027332685142755508,
"step": 210
},
{
"calibration/aurc": 0.334584462013137,
"calibration/batch_distribution_entropy": 0.981975898164209,
"calibration/batch_entropy_100bins": 0.9626993539936158,
"calibration/batch_entropy_10bins": 0.981975898164209,
"calibration/batch_entropy_50bins": 0.9758907463855657,
"calibration/batch_uniqueness": 0.9594284057909366,
"calibration/buffer_distribution_entropy": 0.9989722834263842,
"calibration/buffer_entropy_100bins": 0.9911441184383936,
"calibration/buffer_entropy_10bins": 0.9989722834263842,
"calibration/buffer_entropy_50bins": 0.9952248695727167,
"calibration/confidence_entropy": 0.49724886655592815,
"calibration/coverage@0%": 0.0042976394324853225,
"calibration/coverage@1%": 0.0042976394324853225,
"calibration/coverage@10%": 0.041407014432485324,
"calibration/coverage@15%": 0.13792196673189822,
"calibration/coverage@20%": 0.3537006176614481,
"calibration/coverage@25%": 0.41976363747553813,
"calibration/coverage@30%": 0.6041638637475538,
"calibration/coverage@5%": 0.019532014432485322,
"calibration/ece": 0.13218861654477354,
"calibration/mean_confidence": 0.5038023206802205,
"calibration/prompt_uniqueness": 0.863943656103668,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1064.4,
"completions/max_terminated_length": 782.0,
"completions/mean_length": 191.0904296875,
"completions/mean_terminated_length": 190.82761840820314,
"completions/min_length": 85.8,
"completions/min_terminated_length": 85.8,
"epoch": 0.688,
"grad_norm": 0.0010152794420719147,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 721063366.0,
"reward": 0.9351613402366639,
"reward_std": 0.08508041054010392,
"rewards/accuracy_reward": 0.54443359375,
"rewards/brier_reward": 0.7874362349510193,
"rewards/confidence_uniqueness_reward": 0.9589925885200501,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003119149315170944,
"rewards/frontier_coverage_0": 0.10003266781568527,
"rewards/frontier_coverage_1": 0.10003266781568527,
"rewards/frontier_coverage_10": 0.10003266781568527,
"rewards/frontier_coverage_15": 0.09940593391656875,
"rewards/frontier_coverage_20": 0.0850291058421135,
"rewards/frontier_coverage_25": 0.048729277402162555,
"rewards/frontier_coverage_5": 0.10003266781568527,
"rewards/frontier_ece_reward": 0.004082085704430938,
"rewards/frontier_entropy_batch_reward": -0.198372682929039,
"signal/accuracy_reward/centered_abs_mean": 0.099102783203125,
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
"signal/accuracy_reward/group_std_mean": 0.13263332694768906,
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495513916015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495513916015625,
"signal/advantage_abs_mean": 0.06450984179973603,
"signal/advantage_pre_scale_abs_mean": 0.06450984179973603,
"signal/advantage_pre_scale_std": 0.10448751300573349,
"signal/advantage_std": 0.10448751300573349,
"signal/brier_reward/centered_abs_mean": 0.12233641296625138,
"signal/brier_reward/group_bin_occupancy": 0.85078125,
"signal/brier_reward/group_std_mean": 0.15672328174114228,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012233641929924488,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.012233641929924488,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012578487582504749,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9265625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016628415510058402,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012578487861901523,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012578487861901523,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002936544781550765,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7078125,
"signal/frontier_aurc_reward/group_std_mean": 0.004950050543993711,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.670681326184422e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.670681326184422e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16857316195964814,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_0/group_std_mean": 0.21436219811439514,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_1/centered_abs_mean": 0.16857316195964814,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_1/group_std_mean": 0.21436219811439514,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_10/centered_abs_mean": 0.16857316195964814,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_10/group_std_mean": 0.21436219811439514,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_15/centered_abs_mean": 0.16726841926574706,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_15/group_std_mean": 0.21273342669010162,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00209085529204458,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00209085529204458,
"signal/frontier_coverage_20/centered_abs_mean": 0.14143361896276474,
"signal/frontier_coverage_20/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_20/group_std_mean": 0.1803019016981125,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017679202603176237,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017679202603176237,
"signal/frontier_coverage_25/centered_abs_mean": 0.07234455198049546,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90390625,
"signal/frontier_coverage_25/group_std_mean": 0.09286017566919327,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009043069556355476,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009043069556355476,
"signal/frontier_coverage_5/centered_abs_mean": 0.16857316195964814,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_5/group_std_mean": 0.21436219811439514,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021071645431220533,
"signal/frontier_ece_reward/centered_abs_mean": 0.007802222948521375,
"signal/frontier_ece_reward/group_bin_occupancy": 0.825390625,
"signal/frontier_ece_reward/group_std_mean": 0.011409426480531693,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007802223321050405,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007802223321050405,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2717812657356262,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730859375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34891357421875,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02717812769114971,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02717812769114971,
"step": 215
},
{
"calibration/aurc": 0.2431222558267499,
"calibration/batch_distribution_entropy": 0.9704915481336872,
"calibration/batch_entropy_100bins": 0.95218267752457,
"calibration/batch_entropy_10bins": 0.9704915481336872,
"calibration/batch_entropy_50bins": 0.9658659766924742,
"calibration/batch_uniqueness": 0.9607759885101235,
"calibration/buffer_distribution_entropy": 0.9988664602008029,
"calibration/buffer_entropy_100bins": 0.9911952395475305,
"calibration/buffer_entropy_10bins": 0.9988664602008029,
"calibration/buffer_entropy_50bins": 0.9952006753696396,
"calibration/confidence_entropy": 0.4901460074658397,
"calibration/coverage@0%": 0.016022504892367905,
"calibration/coverage@1%": 0.016022504892367905,
"calibration/coverage@10%": 0.1179756298923679,
"calibration/coverage@15%": 0.22891848091976516,
"calibration/coverage@20%": 0.37267841854207434,
"calibration/coverage@25%": 0.5801140533268102,
"calibration/coverage@30%": 0.6719300391389432,
"calibration/coverage@5%": 0.048053754892367906,
"calibration/ece": 0.11020522398245616,
"calibration/mean_confidence": 0.5436369682894794,
"calibration/prompt_uniqueness": 0.8631135779786681,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 753.2,
"completions/max_terminated_length": 586.6,
"completions/mean_length": 189.27822265625,
"completions/mean_terminated_length": 189.1458312988281,
"completions/min_length": 83.6,
"completions/min_terminated_length": 83.6,
"epoch": 0.704,
"grad_norm": 0.0008683862979523838,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 737867719.0,
"reward": 0.9372278213500976,
"reward_std": 0.08068549633026123,
"rewards/accuracy_reward": 0.54736328125,
"rewards/brier_reward": 0.7968594074249268,
"rewards/confidence_uniqueness_reward": 0.9608587741851806,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003032087814062834,
"rewards/frontier_coverage_0": 0.09889980629086495,
"rewards/frontier_coverage_1": 0.09889980629086495,
"rewards/frontier_coverage_10": 0.09889980629086495,
"rewards/frontier_coverage_15": 0.09856819957494736,
"rewards/frontier_coverage_20": 0.08530885577201844,
"rewards/frontier_coverage_25": 0.051339687407016756,
"rewards/frontier_coverage_5": 0.09889980629086495,
"rewards/frontier_ece_reward": 0.004148419946432114,
"rewards/frontier_entropy_batch_reward": -0.20438967049121856,
"signal/accuracy_reward/centered_abs_mean": 0.083721923828125,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.11213247925043106,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418609619140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0418609619140625,
"signal/advantage_abs_mean": 0.06281042322516442,
"signal/advantage_pre_scale_abs_mean": 0.06281042322516442,
"signal/advantage_pre_scale_std": 0.10061680972576141,
"signal/advantage_std": 0.10061680972576141,
"signal/brier_reward/centered_abs_mean": 0.11816587895154954,
"signal/brier_reward/group_bin_occupancy": 0.853125,
"signal/brier_reward/group_std_mean": 0.1513270229101181,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01181658823043108,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01181658823043108,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011747047305107117,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9234375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015102297998964787,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001174704753793776,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001174704753793776,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002884101867675781,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71328125,
"signal/frontier_aurc_reward/group_std_mean": 0.004772100504487753,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6051273491466417e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6051273491466417e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15133111774921418,
"signal/frontier_coverage_0/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_0/group_std_mean": 0.1936686307191849,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_1/centered_abs_mean": 0.15133111774921418,
"signal/frontier_coverage_1/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_1/group_std_mean": 0.1936686307191849,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_10/centered_abs_mean": 0.15133111774921418,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_10/group_std_mean": 0.1936686307191849,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_15/centered_abs_mean": 0.15005215704441072,
"signal/frontier_coverage_15/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_15/group_std_mean": 0.19203002452850343,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018756520003080368,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018756520003080368,
"signal/frontier_coverage_20/centered_abs_mean": 0.1224544808268547,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_20/group_std_mean": 0.15708767175674437,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015306809917092323,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015306809917092323,
"signal/frontier_coverage_25/centered_abs_mean": 0.06395273804664611,
"signal/frontier_coverage_25/group_bin_occupancy": 0.912890625,
"signal/frontier_coverage_25/group_std_mean": 0.08253951072692871,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007994092302396894,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007994092302396894,
"signal/frontier_coverage_5/centered_abs_mean": 0.15133111774921418,
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_5/group_std_mean": 0.1936686307191849,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018916390370577573,
"signal/frontier_ece_reward/centered_abs_mean": 0.007144089136272669,
"signal/frontier_ece_reward/group_bin_occupancy": 0.826171875,
"signal/frontier_ece_reward/group_std_mean": 0.010504491440951825,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007144089206121862,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007144089206121862,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265256404876709,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.339794796705246,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652563974261284,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652563974261284,
"step": 220
},
{
"calibration/aurc": 0.23301790118207893,
"calibration/batch_distribution_entropy": 0.9875498553783582,
"calibration/batch_entropy_100bins": 0.9644342655540867,
"calibration/batch_entropy_10bins": 0.9875498553783582,
"calibration/batch_entropy_50bins": 0.9791999063823041,
"calibration/batch_uniqueness": 0.9621734619140625,
"calibration/buffer_distribution_entropy": 0.998861625242886,
"calibration/buffer_entropy_100bins": 0.9913135794668897,
"calibration/buffer_entropy_10bins": 0.998861625242886,
"calibration/buffer_entropy_50bins": 0.9952531135075292,
"calibration/confidence_entropy": 0.5181585521770237,
"calibration/coverage@0%": 0.0671875,
"calibration/coverage@1%": 0.08359375,
"calibration/coverage@10%": 0.22265625,
"calibration/coverage@15%": 0.281640625,
"calibration/coverage@20%": 0.43515625,
"calibration/coverage@25%": 0.585546875,
"calibration/coverage@30%": 0.692578125,
"calibration/coverage@5%": 0.160546875,
"calibration/ece": 0.131002530713914,
"calibration/mean_confidence": 0.5249371858462698,
"calibration/prompt_uniqueness": 0.870068359375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 731.4,
"completions/max_terminated_length": 731.4,
"completions/mean_length": 191.42177734375,
"completions/mean_terminated_length": 191.42177734375,
"completions/min_length": 82.6,
"completions/min_terminated_length": 82.6,
"epoch": 0.72,
"grad_norm": 0.0009743034606799483,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 754837734.0,
"reward": 0.9488389015197753,
"reward_std": 0.08193524926900864,
"rewards/accuracy_reward": 0.57021484375,
"rewards/brier_reward": 0.8064169526100159,
"rewards/confidence_uniqueness_reward": 0.9598495483398437,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0021567588206380605,
"rewards/frontier_coverage_0": 0.08862596154212951,
"rewards/frontier_coverage_1": 0.08862596154212951,
"rewards/frontier_coverage_10": 0.08861215263605118,
"rewards/frontier_coverage_15": 0.0873618446290493,
"rewards/frontier_coverage_20": 0.07677424550056458,
"rewards/frontier_coverage_25": 0.04842212200164795,
"rewards/frontier_coverage_5": 0.08862596154212951,
"rewards/frontier_ece_reward": 0.0034100091550499203,
"rewards/frontier_entropy_batch_reward": -0.20297325849533082,
"signal/accuracy_reward/centered_abs_mean": 0.086773681640625,
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
"signal/accuracy_reward/group_std_mean": 0.12210773676633835,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433868408203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0433868408203125,
"signal/advantage_abs_mean": 0.061074144393205645,
"signal/advantage_pre_scale_abs_mean": 0.061074144393205645,
"signal/advantage_pre_scale_std": 0.09964745044708252,
"signal/advantage_std": 0.09964745044708252,
"signal/brier_reward/centered_abs_mean": 0.10820089429616928,
"signal/brier_reward/group_bin_occupancy": 0.8640625,
"signal/brier_reward/group_std_mean": 0.1395553916692734,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010820089280605317,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010820089280605317,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012313938140869141,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015382156148552895,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012313938699662686,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012313938699662686,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019083557184785605,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70859375,
"signal/frontier_aurc_reward/group_std_mean": 0.003126844298094511,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.385444749961607e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.385444749961607e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15149271190166474,
"signal/frontier_coverage_0/group_bin_occupancy": 0.875,
"signal/frontier_coverage_0/group_std_mean": 0.19624074995517732,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_coverage_1/centered_abs_mean": 0.15149271190166474,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.19624074995517732,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_coverage_10/centered_abs_mean": 0.15089576244354247,
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_10/group_std_mean": 0.1954701155424118,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018861971329897642,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018861971329897642,
"signal/frontier_coverage_15/centered_abs_mean": 0.14784342050552368,
"signal/frontier_coverage_15/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_15/group_std_mean": 0.19153738617897034,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018480427097529173,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018480427097529173,
"signal/frontier_coverage_20/centered_abs_mean": 0.11549332290887833,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_20/group_std_mean": 0.14993580281734467,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014436665922403335,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014436665922403335,
"signal/frontier_coverage_25/centered_abs_mean": 0.0574177585542202,
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
"signal/frontier_coverage_25/group_std_mean": 0.07405912727117539,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007177219958975911,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007177219958975911,
"signal/frontier_coverage_5/centered_abs_mean": 0.15149271190166474,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.19624074995517732,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001893658982589841,
"signal/frontier_ece_reward/centered_abs_mean": 0.006441084947437048,
"signal/frontier_ece_reward/group_bin_occupancy": 0.840625,
"signal/frontier_ece_reward/group_std_mean": 0.009463933855295181,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006441084784455598,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006441084784455598,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27501477003097535,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3496582627296448,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027501478046178817,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027501478046178817,
"step": 225
},
{
"calibration/aurc": 0.25028817244424917,
"calibration/batch_distribution_entropy": 0.9743689912131274,
"calibration/batch_entropy_100bins": 0.9518182314907483,
"calibration/batch_entropy_10bins": 0.9743689912131274,
"calibration/batch_entropy_50bins": 0.9659027223777581,
"calibration/batch_uniqueness": 0.9613949453726036,
"calibration/buffer_distribution_entropy": 0.9989980442369921,
"calibration/buffer_entropy_100bins": 0.9914707354706082,
"calibration/buffer_entropy_10bins": 0.9989980442369921,
"calibration/buffer_entropy_50bins": 0.9953458202834676,
"calibration/confidence_entropy": 0.4951727113718006,
"calibration/coverage@0%": 0.007818615459882583,
"calibration/coverage@1%": 0.007818615459882583,
"calibration/coverage@10%": 0.11291050024461839,
"calibration/coverage@15%": 0.1961281494618395,
"calibration/coverage@20%": 0.4301194043542075,
"calibration/coverage@25%": 0.5449677409491194,
"calibration/coverage@30%": 0.6873287671232877,
"calibration/coverage@5%": 0.04455112524461839,
"calibration/ece": 0.13390157547439757,
"calibration/mean_confidence": 0.5367679906823735,
"calibration/prompt_uniqueness": 0.8548456197970864,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 970.4,
"completions/max_terminated_length": 763.0,
"completions/mean_length": 190.246484375,
"completions/mean_terminated_length": 190.1147430419922,
"completions/min_length": 85.4,
"completions/min_terminated_length": 85.4,
"epoch": 0.736,
"grad_norm": 0.0006948218797333539,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 771725442.0,
"reward": 0.9490613460540771,
"reward_std": 0.0752700299024582,
"rewards/accuracy_reward": 0.5681640625,
"rewards/brier_reward": 0.8009598612785339,
"rewards/confidence_uniqueness_reward": 0.9616155385971069,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002585971378721297,
"rewards/frontier_coverage_0": 0.09696303457021713,
"rewards/frontier_coverage_1": 0.09696303457021713,
"rewards/frontier_coverage_10": 0.09685205966234207,
"rewards/frontier_coverage_15": 0.09585188180208207,
"rewards/frontier_coverage_20": 0.08253547102212906,
"rewards/frontier_coverage_25": 0.05373050421476364,
"rewards/frontier_coverage_5": 0.09696303457021713,
"rewards/frontier_ece_reward": 0.0037560143042355775,
"rewards/frontier_entropy_batch_reward": -0.19320926070213318,
"signal/accuracy_reward/centered_abs_mean": 0.0760009765625,
"signal/accuracy_reward/group_bin_occupancy": 0.162890625,
"signal/accuracy_reward/group_std_mean": 0.10297959595918656,
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03800048828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03800048828125,
"signal/advantage_abs_mean": 0.058193684369325635,
"signal/advantage_pre_scale_abs_mean": 0.058193684369325635,
"signal/advantage_pre_scale_std": 0.09324042946100235,
"signal/advantage_std": 0.09324042946100235,
"signal/brier_reward/centered_abs_mean": 0.11048106700181962,
"signal/brier_reward/group_bin_occupancy": 0.84609375,
"signal/brier_reward/group_std_mean": 0.14282523095607758,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011048106662929057,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011048106662929057,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011998776532709598,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.912109375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015458272024989127,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011998776812106372,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011998776812106372,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023550010519102216,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004158449545502663,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9437512421282008e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9437512421282008e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15034229159355164,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_0/group_std_mean": 0.19372088611125945,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_coverage_1/centered_abs_mean": 0.15034229159355164,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_1/group_std_mean": 0.19372088611125945,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_coverage_10/centered_abs_mean": 0.1496051698923111,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_10/group_std_mean": 0.1928351491689682,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001870064646936953,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001870064646936953,
"signal/frontier_coverage_15/centered_abs_mean": 0.1457270860671997,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_15/group_std_mean": 0.18802883327007294,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018215886317193507,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018215886317193507,
"signal/frontier_coverage_20/centered_abs_mean": 0.11060539782047271,
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_20/group_std_mean": 0.1432916909456253,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013825674774125218,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013825674774125218,
"signal/frontier_coverage_25/centered_abs_mean": 0.05945408642292023,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9125,
"signal/frontier_coverage_25/group_std_mean": 0.0763387769460678,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007431761012412607,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007431761012412607,
"signal/frontier_coverage_5/centered_abs_mean": 0.15034229159355164,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_5/group_std_mean": 0.19372088611125945,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018792787101119756,
"signal/frontier_ece_reward/centered_abs_mean": 0.006753822509199381,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82734375,
"signal/frontier_ece_reward/group_std_mean": 0.010070707648992538,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006753822672180831,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006753822672180831,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26861504912376405,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3452408015727997,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02686150446534157,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02686150446534157,
"step": 230
},
{
"calibration/aurc": 0.256449338221134,
"calibration/batch_distribution_entropy": 0.968129743999475,
"calibration/batch_entropy_100bins": 0.9535105366339899,
"calibration/batch_entropy_10bins": 0.968129743999475,
"calibration/batch_entropy_50bins": 0.9658034715240916,
"calibration/batch_uniqueness": 0.959322589602819,
"calibration/buffer_distribution_entropy": 0.9990122658119412,
"calibration/buffer_entropy_100bins": 0.9914938682880787,
"calibration/buffer_entropy_10bins": 0.9990122658119412,
"calibration/buffer_entropy_50bins": 0.9952962057881454,
"calibration/confidence_entropy": 0.4566113101334025,
"calibration/coverage@0%": 0.014465355919765166,
"calibration/coverage@1%": 0.014465355919765166,
"calibration/coverage@10%": 0.17503516389432486,
"calibration/coverage@15%": 0.3086740154109589,
"calibration/coverage@20%": 0.44046370474559693,
"calibration/coverage@25%": 0.5280225660469667,
"calibration/coverage@30%": 0.6393850905088063,
"calibration/coverage@5%": 0.059005014677103715,
"calibration/ece": 0.13985793070933042,
"calibration/mean_confidence": 0.4696687177806269,
"calibration/prompt_uniqueness": 0.851551728342872,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 949.0,
"completions/max_terminated_length": 544.6,
"completions/mean_length": 188.6611328125,
"completions/mean_terminated_length": 188.26605224609375,
"completions/min_length": 87.2,
"completions/min_terminated_length": 87.2,
"epoch": 0.752,
"grad_norm": 0.0008416337659582496,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 788884532.0,
"reward": 0.9455557703971863,
"reward_std": 0.08068245649337769,
"rewards/accuracy_reward": 0.566015625,
"rewards/brier_reward": 0.7956305265426635,
"rewards/confidence_uniqueness_reward": 0.9623886108398437,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003054162277840078,
"rewards/frontier_coverage_0": 0.10228811725974082,
"rewards/frontier_coverage_1": 0.10228811725974082,
"rewards/frontier_coverage_10": 0.10170512199401856,
"rewards/frontier_coverage_15": 0.09902632944285869,
"rewards/frontier_coverage_20": 0.07840342242270708,
"rewards/frontier_coverage_25": 0.050577325746417046,
"rewards/frontier_coverage_5": 0.10181083604693413,
"rewards/frontier_ece_reward": 0.0037714077159762384,
"rewards/frontier_entropy_batch_reward": -0.21397663354873658,
"signal/accuracy_reward/centered_abs_mean": 0.0837158203125,
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
"signal/accuracy_reward/group_std_mean": 0.11319768130779266,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04185791015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04185791015625,
"signal/advantage_abs_mean": 0.06239245980978012,
"signal/advantage_pre_scale_abs_mean": 0.06239245980978012,
"signal/advantage_pre_scale_std": 0.10159500986337662,
"signal/advantage_std": 0.10159500986337662,
"signal/brier_reward/centered_abs_mean": 0.113985575735569,
"signal/brier_reward/group_bin_occupancy": 0.82890625,
"signal/brier_reward/group_std_mean": 0.14819374084472656,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011398557387292386,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011398557387292386,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012159938551485538,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91640625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01617111321538687,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012159939156845211,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012159939156845211,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029334662482142448,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625,
"signal/frontier_aurc_reward/group_std_mean": 0.00483027109876275,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.666832781163975e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.666832781163975e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15212544202804565,
"signal/frontier_coverage_0/group_bin_occupancy": 0.853125,
"signal/frontier_coverage_0/group_std_mean": 0.19812886118888856,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019015680765733123,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019015680765733123,
"signal/frontier_coverage_1/centered_abs_mean": 0.15212544202804565,
"signal/frontier_coverage_1/group_bin_occupancy": 0.853125,
"signal/frontier_coverage_1/group_std_mean": 0.19812886118888856,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019015680765733123,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019015680765733123,
"signal/frontier_coverage_10/centered_abs_mean": 0.15115560591220856,
"signal/frontier_coverage_10/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_10/group_std_mean": 0.19689476490020752,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018894450971856714,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018894450971856714,
"signal/frontier_coverage_15/centered_abs_mean": 0.14616797864437103,
"signal/frontier_coverage_15/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_15/group_std_mean": 0.1905912697315216,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001827099802903831,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001827099802903831,
"signal/frontier_coverage_20/centered_abs_mean": 0.10996298342943192,
"signal/frontier_coverage_20/group_bin_occupancy": 0.843359375,
"signal/frontier_coverage_20/group_std_mean": 0.14416728615760804,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013745372649282216,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013745372649282216,
"signal/frontier_coverage_25/centered_abs_mean": 0.05818985775113106,
"signal/frontier_coverage_25/group_bin_occupancy": 0.911328125,
"signal/frontier_coverage_25/group_std_mean": 0.07572825103998185,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007273732335306704,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007273732335306704,
"signal/frontier_coverage_5/centered_abs_mean": 0.15179600417613984,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85078125,
"signal/frontier_coverage_5/group_std_mean": 0.19772669970989226,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018974500941112637,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018974500941112637,
"signal/frontier_ece_reward/centered_abs_mean": 0.007248471491038799,
"signal/frontier_ece_reward/group_bin_occupancy": 0.81953125,
"signal/frontier_ece_reward/group_std_mean": 0.010719313845038414,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007248471258208156,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007248471258208156,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2787540197372437,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3564418852329254,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027875401824712754,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027875401824712754,
"step": 235
},
{
"calibration/aurc": 0.299067613553852,
"calibration/batch_distribution_entropy": 0.9776422799775325,
"calibration/batch_entropy_100bins": 0.9544932057058523,
"calibration/batch_entropy_10bins": 0.9776422799775325,
"calibration/batch_entropy_50bins": 0.9740477455082059,
"calibration/batch_uniqueness": 0.9616119384765625,
"calibration/buffer_distribution_entropy": 0.9990121183367439,
"calibration/buffer_entropy_100bins": 0.9915586285603151,
"calibration/buffer_entropy_10bins": 0.9990121183367439,
"calibration/buffer_entropy_50bins": 0.9953215015311525,
"calibration/confidence_entropy": 0.5014430602492166,
"calibration/coverage@0%": 0.066015625,
"calibration/coverage@1%": 0.066015625,
"calibration/coverage@10%": 0.216015625,
"calibration/coverage@15%": 0.26328125,
"calibration/coverage@20%": 0.366015625,
"calibration/coverage@25%": 0.455859375,
"calibration/coverage@30%": 0.491015625,
"calibration/coverage@5%": 0.090625,
"calibration/ece": 0.16444165512890357,
"calibration/mean_confidence": 0.47969150508930003,
"calibration/prompt_uniqueness": 0.868505859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1003.0,
"completions/max_terminated_length": 624.2,
"completions/mean_length": 191.987890625,
"completions/mean_terminated_length": 191.72521362304687,
"completions/min_length": 80.8,
"completions/min_terminated_length": 80.8,
"epoch": 0.768,
"grad_norm": 0.001026144833303988,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 805783192.0,
"reward": 0.9215254068374634,
"reward_std": 0.07930080592632294,
"rewards/accuracy_reward": 0.51201171875,
"rewards/brier_reward": 0.8031093597412109,
"rewards/confidence_uniqueness_reward": 0.9619287371635437,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002836792590096593,
"rewards/frontier_coverage_0": 0.13882942795753478,
"rewards/frontier_coverage_1": 0.13882942795753478,
"rewards/frontier_coverage_10": 0.13846020698547362,
"rewards/frontier_coverage_15": 0.13496174067258834,
"rewards/frontier_coverage_20": 0.10879542678594589,
"rewards/frontier_coverage_25": 0.0587244875729084,
"rewards/frontier_coverage_5": 0.13846020698547362,
"rewards/frontier_ece_reward": 0.003680743183940649,
"rewards/frontier_entropy_batch_reward": -0.21932466328144073,
"signal/accuracy_reward/centered_abs_mean": 0.079571533203125,
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
"signal/accuracy_reward/group_std_mean": 0.10974450260400773,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0397857666015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0397857666015625,
"signal/advantage_abs_mean": 0.0608487643301487,
"signal/advantage_pre_scale_abs_mean": 0.0608487643301487,
"signal/advantage_pre_scale_std": 0.09865092337131501,
"signal/advantage_std": 0.09865092337131501,
"signal/brier_reward/centered_abs_mean": 0.10833943039178848,
"signal/brier_reward/group_bin_occupancy": 0.8515625,
"signal/brier_reward/group_std_mean": 0.14004457592964173,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833943635225296,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010833943635225296,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012927094288170338,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016739430651068688,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292709424160421,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292709424160421,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024011209141463043,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375,
"signal/frontier_aurc_reward/group_std_mean": 0.003994084335863591,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.001401055371389e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.001401055371389e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15316719114780425,
"signal/frontier_coverage_0/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_0/group_std_mean": 0.19865505993366242,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001914589968509972,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001914589968509972,
"signal/frontier_coverage_1/centered_abs_mean": 0.15316719114780425,
"signal/frontier_coverage_1/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_1/group_std_mean": 0.19865505993366242,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001914589968509972,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001914589968509972,
"signal/frontier_coverage_10/centered_abs_mean": 0.15222469270229338,
"signal/frontier_coverage_10/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_10/group_std_mean": 0.19741056561470033,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001902808714658022,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001902808714658022,
"signal/frontier_coverage_15/centered_abs_mean": 0.1450010806322098,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87578125,
"signal/frontier_coverage_15/group_std_mean": 0.18793485462665557,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018125135218724608,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018125135218724608,
"signal/frontier_coverage_20/centered_abs_mean": 0.10850205421447753,
"signal/frontier_coverage_20/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_20/group_std_mean": 0.14055884182453154,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013562757056206464,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013562757056206464,
"signal/frontier_coverage_25/centered_abs_mean": 0.05681398212909698,
"signal/frontier_coverage_25/group_bin_occupancy": 0.921875,
"signal/frontier_coverage_25/group_std_mean": 0.07298188954591751,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007101748022250831,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007101748022250831,
"signal/frontier_coverage_5/centered_abs_mean": 0.15222469270229338,
"signal/frontier_coverage_5/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_5/group_std_mean": 0.19741056561470033,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001902808714658022,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001902808714658022,
"signal/frontier_ece_reward/centered_abs_mean": 0.006277401559054851,
"signal/frontier_ece_reward/group_bin_occupancy": 0.837109375,
"signal/frontier_ece_reward/group_std_mean": 0.009239476174116135,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006277402047999203,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006277402047999203,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2777975261211395,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35362735390663147,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027779752761125563,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027779752761125563,
"step": 240
},
{
"calibration/aurc": 0.32235280813281475,
"calibration/batch_distribution_entropy": 0.9786483096457314,
"calibration/batch_entropy_100bins": 0.9500265876570528,
"calibration/batch_entropy_10bins": 0.9786483096457314,
"calibration/batch_entropy_50bins": 0.9700209996178053,
"calibration/batch_uniqueness": 0.9649993896484375,
"calibration/buffer_distribution_entropy": 0.9989889119809028,
"calibration/buffer_entropy_100bins": 0.9915393121863978,
"calibration/buffer_entropy_10bins": 0.9989889119809028,
"calibration/buffer_entropy_50bins": 0.9953476043016061,
"calibration/confidence_entropy": 0.4872507579959541,
"calibration/coverage@0%": 0.016796875,
"calibration/coverage@1%": 0.016796875,
"calibration/coverage@10%": 0.153515625,
"calibration/coverage@15%": 0.271875,
"calibration/coverage@20%": 0.31484375,
"calibration/coverage@25%": 0.343359375,
"calibration/coverage@30%": 0.3921875,
"calibration/coverage@5%": 0.114453125,
"calibration/ece": 0.15300725756631883,
"calibration/mean_confidence": 0.509216670802342,
"calibration/prompt_uniqueness": 0.8611328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1067.4,
"completions/max_terminated_length": 785.6,
"completions/mean_length": 189.0556640625,
"completions/mean_terminated_length": 188.79233093261718,
"completions/min_length": 81.2,
"completions/min_terminated_length": 81.2,
"epoch": 0.784,
"grad_norm": 0.0008188265492208302,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 822893490.0,
"reward": 0.9416593551635742,
"reward_std": 0.08285669684410095,
"rewards/accuracy_reward": 0.5642578125,
"rewards/brier_reward": 0.7843694448471069,
"rewards/confidence_uniqueness_reward": 0.9650574326515198,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003067029034718871,
"rewards/frontier_coverage_0": 0.08356368988752365,
"rewards/frontier_coverage_1": 0.08356368988752365,
"rewards/frontier_coverage_10": 0.08326268717646598,
"rewards/frontier_coverage_15": 0.07886564061045646,
"rewards/frontier_coverage_20": 0.06277668662369251,
"rewards/frontier_coverage_25": 0.044081108272075654,
"rewards/frontier_coverage_5": 0.08318910598754883,
"rewards/frontier_ece_reward": 0.002441513957455754,
"rewards/frontier_entropy_batch_reward": -0.22011671662330629,
"signal/accuracy_reward/centered_abs_mean": 0.09090576171875,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.12054677605628968,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045452880859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045452880859375,
"signal/advantage_abs_mean": 0.06448552757501602,
"signal/advantage_pre_scale_abs_mean": 0.06448552757501602,
"signal/advantage_pre_scale_std": 0.10121935606002808,
"signal/advantage_std": 0.10121935606002808,
"signal/brier_reward/centered_abs_mean": 0.1141832172870636,
"signal/brier_reward/group_bin_occupancy": 0.8546875,
"signal/brier_reward/group_std_mean": 0.14592179358005525,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011418322287499904,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011418322287499904,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012644784711301326,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8859375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01663502026349306,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012644784990698099,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012644784990698099,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026621847413480283,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625,
"signal/frontier_aurc_reward/group_std_mean": 0.0041458617430180315,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.327731028548442e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.327731028548442e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1563648372888565,
"signal/frontier_coverage_0/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_0/group_std_mean": 0.200226292014122,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001954560517333448,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001954560517333448,
"signal/frontier_coverage_1/centered_abs_mean": 0.1563648372888565,
"signal/frontier_coverage_1/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_1/group_std_mean": 0.200226292014122,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001954560517333448,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001954560517333448,
"signal/frontier_coverage_10/centered_abs_mean": 0.1553775906562805,
"signal/frontier_coverage_10/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_10/group_std_mean": 0.19900963306427003,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019422198878601194,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019422198878601194,
"signal/frontier_coverage_15/centered_abs_mean": 0.1479180335998535,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.18967563509941102,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018489754293113947,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018489754293113947,
"signal/frontier_coverage_20/centered_abs_mean": 0.10082450807094574,
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_20/group_std_mean": 0.1297900453209877,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001260306383483112,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001260306383483112,
"signal/frontier_coverage_25/centered_abs_mean": 0.055328131467103955,
"signal/frontier_coverage_25/group_bin_occupancy": 0.928125,
"signal/frontier_coverage_25/group_std_mean": 0.07085389196872711,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006916016573086381,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006916016573086381,
"signal/frontier_coverage_5/centered_abs_mean": 0.15557830333709716,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_5/group_std_mean": 0.19926558434963226,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001944728777743876,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001944728777743876,
"signal/frontier_ece_reward/centered_abs_mean": 0.006715606153011322,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
"signal/frontier_ece_reward/group_std_mean": 0.009772182628512382,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006715606432408094,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006715606432408094,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2859824955463409,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3597340643405914,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028598250076174735,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028598250076174735,
"step": 245
},
{
"calibration/aurc": 0.21915763575922814,
"calibration/batch_distribution_entropy": 0.9810695570789931,
"calibration/batch_entropy_100bins": 0.9486454306483759,
"calibration/batch_entropy_10bins": 0.9810695570789931,
"calibration/batch_entropy_50bins": 0.9722712681327159,
"calibration/batch_uniqueness": 0.965252685546875,
"calibration/buffer_distribution_entropy": 0.9990129339746321,
"calibration/buffer_entropy_100bins": 0.9912903876248045,
"calibration/buffer_entropy_10bins": 0.9990129339746321,
"calibration/buffer_entropy_50bins": 0.9953314983930802,
"calibration/confidence_entropy": 0.4940070201234362,
"calibration/coverage@0%": 0.041015625,
"calibration/coverage@1%": 0.041015625,
"calibration/coverage@10%": 0.23515625,
"calibration/coverage@15%": 0.365625,
"calibration/coverage@20%": 0.521484375,
"calibration/coverage@25%": 0.6296875,
"calibration/coverage@30%": 0.7234375,
"calibration/coverage@5%": 0.091015625,
"calibration/ece": 0.1043095249640625,
"calibration/mean_confidence": 0.5099823944953126,
"calibration/prompt_uniqueness": 0.863818359375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 913.4,
"completions/max_terminated_length": 492.4,
"completions/mean_length": 185.3513671875,
"completions/mean_terminated_length": 184.95630493164063,
"completions/min_length": 88.0,
"completions/min_terminated_length": 88.0,
"epoch": 0.8,
"grad_norm": 0.0009886363986879587,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 839802048.0,
"reward": 0.9549939274787903,
"reward_std": 0.08174641579389572,
"rewards/accuracy_reward": 0.59150390625,
"rewards/brier_reward": 0.809298062324524,
"rewards/confidence_uniqueness_reward": 0.9655136346817017,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.00310264159925282,
"rewards/frontier_coverage_0": 0.08750025108456612,
"rewards/frontier_coverage_1": 0.08750025108456612,
"rewards/frontier_coverage_10": 0.08737820237874985,
"rewards/frontier_coverage_15": 0.08418880626559258,
"rewards/frontier_coverage_20": 0.06368328407406806,
"rewards/frontier_coverage_25": 0.04929944053292275,
"rewards/frontier_coverage_5": 0.08737820237874985,
"rewards/frontier_ece_reward": 0.0034091237001121046,
"rewards/frontier_entropy_batch_reward": -0.25231444239616396,
"signal/accuracy_reward/centered_abs_mean": 0.084820556640625,
"signal/accuracy_reward/group_bin_occupancy": 0.165625,
"signal/accuracy_reward/group_std_mean": 0.11230973601341247,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0424102783203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0424102783203125,
"signal/advantage_abs_mean": 0.0636213093996048,
"signal/advantage_pre_scale_abs_mean": 0.0636213093996048,
"signal/advantage_pre_scale_std": 0.10311011075973511,
"signal/advantage_std": 0.10311011075973511,
"signal/brier_reward/centered_abs_mean": 0.1033732384443283,
"signal/brier_reward/group_bin_occupancy": 0.84375,
"signal/brier_reward/group_std_mean": 0.1343769446015358,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010337324067950248,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010337324067950248,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013043990544974803,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87109375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017386937327682973,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001304399105720222,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001304399105720222,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00283603323623538,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.704296875,
"signal/frontier_aurc_reward/group_std_mean": 0.0045263932552188635,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.545041545294225e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.545041545294225e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13621854037046432,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_0/group_std_mean": 0.17529793679714203,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017027317779138684,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017027317779138684,
"signal/frontier_coverage_1/centered_abs_mean": 0.13621854037046432,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_1/group_std_mean": 0.17529793679714203,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017027317779138684,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017027317779138684,
"signal/frontier_coverage_10/centered_abs_mean": 0.13600390702486037,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_10/group_std_mean": 0.17503868341445922,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017000488704070448,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017000488704070448,
"signal/frontier_coverage_15/centered_abs_mean": 0.12785129249095917,
"signal/frontier_coverage_15/group_bin_occupancy": 0.859765625,
"signal/frontier_coverage_15/group_std_mean": 0.16478919386863708,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015981412259861826,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015981412259861826,
"signal/frontier_coverage_20/centered_abs_mean": 0.08313901722431183,
"signal/frontier_coverage_20/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_20/group_std_mean": 0.10833462625741959,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00103923772694543,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00103923772694543,
"signal/frontier_coverage_25/centered_abs_mean": 0.05012721195816994,
"signal/frontier_coverage_25/group_bin_occupancy": 0.926953125,
"signal/frontier_coverage_25/group_std_mean": 0.06447599828243256,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006265901494771243,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006265901494771243,
"signal/frontier_coverage_5/centered_abs_mean": 0.13600390702486037,
"signal/frontier_coverage_5/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_5/group_std_mean": 0.17503868341445922,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017000488704070448,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017000488704070448,
"signal/frontier_ece_reward/centered_abs_mean": 0.007046621013432741,
"signal/frontier_ece_reward/group_bin_occupancy": 0.829296875,
"signal/frontier_ece_reward/group_std_mean": 0.010701733268797397,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000704662105999887,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000704662105999887,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29779070019721987,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7203125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36754211187362673,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02977906949818134,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02977906949818134,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4222622649766603,
"eval_calibration/batch_distribution_entropy": 0.93831284978888,
"eval_calibration/batch_entropy_100bins": 0.7008354586552061,
"eval_calibration/batch_entropy_10bins": 0.93831284978888,
"eval_calibration/batch_entropy_50bins": 0.7678645493443049,
"eval_calibration/batch_uniqueness": 0.9052734375,
"eval_calibration/buffer_distribution_entropy": 0.999063500343571,
"eval_calibration/buffer_entropy_100bins": 0.9911236376097536,
"eval_calibration/buffer_entropy_10bins": 0.999063500343571,
"eval_calibration/buffer_entropy_50bins": 0.9954196166686817,
"eval_calibration/confidence_entropy": 0.48469888985633597,
"eval_calibration/coverage@0%": 0.0703125,
"eval_calibration/coverage@1%": 0.0703125,
"eval_calibration/coverage@10%": 0.0703125,
"eval_calibration/coverage@15%": 0.0703125,
"eval_calibration/coverage@20%": 0.0703125,
"eval_calibration/coverage@25%": 0.140625,
"eval_calibration/coverage@30%": 0.4296875,
"eval_calibration/coverage@5%": 0.0703125,
"eval_calibration/ece": 0.193647390625,
"eval_calibration/mean_confidence": 0.47742710937499994,
"eval_calibration/prompt_uniqueness": 0.9052734375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 417.75,
"eval_completions/max_terminated_length": 417.75,
"eval_completions/mean_length": 189.96800994873047,
"eval_completions/mean_terminated_length": 189.96800994873047,
"eval_completions/min_length": 99.5,
"eval_completions/min_terminated_length": 99.5,
"eval_loss": 0.0,
"eval_num_tokens": 839802048.0,
"eval_reward": 0.8031501024961472,
"eval_reward_std": 0.2302834540605545,
"eval_rewards/accuracy_reward": 0.43359375,
"eval_rewards/brier_reward": 0.8097970336675644,
"eval_rewards/confidence_uniqueness_reward": 0.909912109375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0030607732478529215,
"eval_rewards/frontier_coverage_0": 0.19227121397852898,
"eval_rewards/frontier_coverage_1": 0.19227121397852898,
"eval_rewards/frontier_coverage_10": 0.19090014696121216,
"eval_rewards/frontier_coverage_15": 0.17611178383231163,
"eval_rewards/frontier_coverage_20": 0.11594182625412941,
"eval_rewards/frontier_coverage_25": 0.05809914506971836,
"eval_rewards/frontier_coverage_5": 0.19090014696121216,
"eval_rewards/frontier_ece_reward": 0.0046437275595963,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 21.1941,
"eval_samples_per_second": 23.592,
"eval_signal/accuracy_reward/centered_abs_mean": 0.474853515625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4946432411670685,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2374267578125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2374267578125,
"eval_signal/advantage_abs_mean": 0.21750148758292198,
"eval_signal/advantage_pre_scale_abs_mean": 0.21750148758292198,
"eval_signal/advantage_pre_scale_std": 0.22778696939349174,
"eval_signal/advantage_std": 0.22778696939349174,
"eval_signal/brier_reward/centered_abs_mean": 0.17260025814175606,
"eval_signal/brier_reward/group_bin_occupancy": 0.8671875,
"eval_signal/brier_reward/group_std_mean": 0.2234898954629898,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01726002711802721,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01726002711802721,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0348052978515625,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.34375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04058399423956871,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034805297618731856,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034805297618731856,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0038759367307648063,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7421875,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006893252138979733,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.844920840696432e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.844920840696432e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.34364357590675354,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_0/group_std_mean": 0.41618141531944275,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004295544931665063,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004295544931665063,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34364357590675354,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.41618141531944275,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004295544931665063,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004295544931665063,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3414214551448822,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4136466532945633,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004267768119461834,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004267768119461834,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3166361153125763,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.3851661831140518,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003957951499614865,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003957951499614865,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.19747909903526306,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625,
"eval_signal/frontier_coverage_20/group_std_mean": 0.24594665691256523,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024684888776391745,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024684888776391745,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08342637866735458,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.10634090937674046,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010428297682665288,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010428297682665288,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3414214551448822,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4136466532945633,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004267768119461834,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004267768119461834,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00797420903109014,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625,
"eval_signal/frontier_ece_reward/group_std_mean": 0.011368014384061098,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007974208710948005,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007974208710948005,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.189,
"step": 250
},
{
"calibration/aurc": 0.23524356922838585,
"calibration/batch_distribution_entropy": 0.9748226569066292,
"calibration/batch_entropy_100bins": 0.9464532567711041,
"calibration/batch_entropy_10bins": 0.9748226569066292,
"calibration/batch_entropy_50bins": 0.9691353813672773,
"calibration/batch_uniqueness": 0.9648882276804489,
"calibration/buffer_distribution_entropy": 0.9989431393987553,
"calibration/buffer_entropy_100bins": 0.9907328547968023,
"calibration/buffer_entropy_10bins": 0.9989431393987553,
"calibration/buffer_entropy_50bins": 0.9952267925200537,
"calibration/confidence_entropy": 0.48285694613510166,
"calibration/coverage@0%": 0.0140625,
"calibration/coverage@1%": 0.0140625,
"calibration/coverage@10%": 0.10546875,
"calibration/coverage@15%": 0.2125,
"calibration/coverage@20%": 0.3839920193248532,
"calibration/coverage@25%": 0.6714377446183952,
"calibration/coverage@30%": 0.798828125,
"calibration/coverage@5%": 0.03046875,
"calibration/ece": 0.13733046909174487,
"calibration/mean_confidence": 0.5263182425337444,
"calibration/prompt_uniqueness": 0.8637435674915451,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 753.4,
"completions/max_terminated_length": 585.2,
"completions/mean_length": 182.904296875,
"completions/mean_terminated_length": 182.77190551757812,
"completions/min_length": 88.8,
"completions/min_terminated_length": 88.8,
"epoch": 0.816,
"grad_norm": 0.001030449173413217,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 856774156.0,
"reward": 0.9531601071357727,
"reward_std": 0.08292276561260223,
"rewards/accuracy_reward": 0.59013671875,
"rewards/brier_reward": 0.7899926781654358,
"rewards/confidence_uniqueness_reward": 0.9667759299278259,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0026666073594242335,
"rewards/frontier_coverage_0": 0.06293640360236168,
"rewards/frontier_coverage_1": 0.06293640360236168,
"rewards/frontier_coverage_10": 0.06290345415472984,
"rewards/frontier_coverage_15": 0.06183330789208412,
"rewards/frontier_coverage_20": 0.052926937490701674,
"rewards/frontier_coverage_25": 0.04199915751814842,
"rewards/frontier_coverage_5": 0.06285227611660957,
"rewards/frontier_ece_reward": 0.002211177465505898,
"rewards/frontier_entropy_batch_reward": -0.22828937768936158,
"signal/accuracy_reward/centered_abs_mean": 0.090277099609375,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.11830563694238663,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451385498046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451385498046875,
"signal/advantage_abs_mean": 0.06491014659404755,
"signal/advantage_pre_scale_abs_mean": 0.06491014659404755,
"signal/advantage_pre_scale_std": 0.1037605032324791,
"signal/advantage_std": 0.1037605032324791,
"signal/brier_reward/centered_abs_mean": 0.11094661056995392,
"signal/brier_reward/group_bin_occupancy": 0.86171875,
"signal/brier_reward/group_std_mean": 0.1411285489797592,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011094661056995391,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011094661056995391,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012302939221262932,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.869140625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01582129541784525,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012302939547225833,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012302939547225833,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002399337338283658,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.738671875,
"signal/frontier_aurc_reward/group_std_mean": 0.0037662754766643047,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9991718110977673e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9991718110977673e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1482946664094925,
"signal/frontier_coverage_0/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_0/group_std_mean": 0.19012218713760376,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001853683264926076,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001853683264926076,
"signal/frontier_coverage_1/centered_abs_mean": 0.1482946664094925,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_1/group_std_mean": 0.19012218713760376,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001853683264926076,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001853683264926076,
"signal/frontier_coverage_10/centered_abs_mean": 0.14743364751338958,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.18905034363269807,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018429205985739828,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018429205985739828,
"signal/frontier_coverage_15/centered_abs_mean": 0.13868501037359238,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_15/group_std_mean": 0.17817612886428832,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017335626529529692,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017335626529529692,
"signal/frontier_coverage_20/centered_abs_mean": 0.08847524970769882,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876953125,
"signal/frontier_coverage_20/group_std_mean": 0.11435707211494446,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011059406446292997,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011059406446292997,
"signal/frontier_coverage_25/centered_abs_mean": 0.05190168023109436,
"signal/frontier_coverage_25/group_bin_occupancy": 0.920703125,
"signal/frontier_coverage_25/group_std_mean": 0.06661412790417671,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006487710168585181,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006487710168585181,
"signal/frontier_coverage_5/centered_abs_mean": 0.1475912719964981,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.18924154639244078,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018448908813297749,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018448908813297749,
"signal/frontier_ece_reward/centered_abs_mean": 0.006282018590718507,
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
"signal/frontier_ece_reward/group_std_mean": 0.009374569542706013,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006282018381170929,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006282018381170929,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29568083882331847,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3687551856040955,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029568084701895713,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029568084701895713,
"step": 255
},
{
"calibration/aurc": 0.2818856091214798,
"calibration/batch_distribution_entropy": 0.9755369371648703,
"calibration/batch_entropy_100bins": 0.9477719023713969,
"calibration/batch_entropy_10bins": 0.9755369371648703,
"calibration/batch_entropy_50bins": 0.9714799065497696,
"calibration/batch_uniqueness": 0.9651092529296875,
"calibration/buffer_distribution_entropy": 0.9988617474514427,
"calibration/buffer_entropy_100bins": 0.9901376392375262,
"calibration/buffer_entropy_10bins": 0.9988617474514427,
"calibration/buffer_entropy_50bins": 0.9950786388567818,
"calibration/confidence_entropy": 0.49679534425027755,
"calibration/coverage@0%": 0.039453125,
"calibration/coverage@1%": 0.039453125,
"calibration/coverage@10%": 0.203125,
"calibration/coverage@15%": 0.248046875,
"calibration/coverage@20%": 0.32890625,
"calibration/coverage@25%": 0.432421875,
"calibration/coverage@30%": 0.51171875,
"calibration/coverage@5%": 0.160546875,
"calibration/ece": 0.11443988386113282,
"calibration/mean_confidence": 0.4846168869722266,
"calibration/prompt_uniqueness": 0.867236328125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 855.6,
"completions/max_terminated_length": 669.8,
"completions/mean_length": 183.81455078125,
"completions/mean_terminated_length": 183.682763671875,
"completions/min_length": 81.4,
"completions/min_terminated_length": 81.4,
"epoch": 0.832,
"grad_norm": 0.0009330803877674043,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 873664769.0,
"reward": 0.9404654026031494,
"reward_std": 0.08173245638608932,
"rewards/accuracy_reward": 0.55751953125,
"rewards/brier_reward": 0.8090359687805175,
"rewards/confidence_uniqueness_reward": 0.9651769518852233,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002471396827604622,
"rewards/frontier_coverage_0": 0.11028219759464264,
"rewards/frontier_coverage_1": 0.11028219759464264,
"rewards/frontier_coverage_10": 0.10962048023939133,
"rewards/frontier_coverage_15": 0.10224549621343612,
"rewards/frontier_coverage_20": 0.07180028259754181,
"rewards/frontier_coverage_25": 0.053031648695468905,
"rewards/frontier_coverage_5": 0.10962048023939133,
"rewards/frontier_ece_reward": 0.0031542435754090548,
"rewards/frontier_entropy_batch_reward": -0.24287400245666504,
"signal/accuracy_reward/centered_abs_mean": 0.090777587890625,
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
"signal/accuracy_reward/group_std_mean": 0.11992976069450378,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0453887939453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0453887939453125,
"signal/advantage_abs_mean": 0.06406652480363846,
"signal/advantage_pre_scale_abs_mean": 0.06406652480363846,
"signal/advantage_pre_scale_std": 0.10220163762569427,
"signal/advantage_std": 0.10220163762569427,
"signal/brier_reward/centered_abs_mean": 0.10323716104030609,
"signal/brier_reward/group_bin_occupancy": 0.856640625,
"signal/brier_reward/group_std_mean": 0.1322301909327507,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010323716327548027,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010323716327548027,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012594187259674072,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016454468481242657,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001259418693371117,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001259418693371117,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002055089036002755,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875,
"signal/frontier_aurc_reward/group_std_mean": 0.003259465633891523,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5688614914542996e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5688614914542996e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14981609880924224,
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_0/group_std_mean": 0.19165619909763337,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018727012909948825,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018727012909948825,
"signal/frontier_coverage_1/centered_abs_mean": 0.14981609880924224,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
"signal/frontier_coverage_1/group_std_mean": 0.19165619909763337,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018727012909948825,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018727012909948825,
"signal/frontier_coverage_10/centered_abs_mean": 0.14883655905723572,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_10/group_std_mean": 0.19037957787513732,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001860457076691091,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001860457076691091,
"signal/frontier_coverage_15/centered_abs_mean": 0.13789782375097276,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_15/group_std_mean": 0.17621307969093322,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017237228574231267,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017237228574231267,
"signal/frontier_coverage_20/centered_abs_mean": 0.08962543904781342,
"signal/frontier_coverage_20/group_bin_occupancy": 0.87890625,
"signal/frontier_coverage_20/group_std_mean": 0.11485566943883896,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011203179834410547,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011203179834410547,
"signal/frontier_coverage_25/centered_abs_mean": 0.052447068691253665,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9203125,
"signal/frontier_coverage_25/group_std_mean": 0.06672648042440414,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006555883679538965,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006555883679538965,
"signal/frontier_coverage_5/centered_abs_mean": 0.14883655905723572,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_5/group_std_mean": 0.19037957787513732,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001860457076691091,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001860457076691091,
"signal/frontier_ece_reward/centered_abs_mean": 0.006253997515887022,
"signal/frontier_ece_reward/group_bin_occupancy": 0.85625,
"signal/frontier_ece_reward/group_std_mean": 0.009096156992018222,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006253997562453151,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006253997562453151,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.290888249874115,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3610431671142578,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029088825359940527,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029088825359940527,
"step": 260
},
{
"calibration/aurc": 0.3199682212623704,
"calibration/batch_distribution_entropy": 0.9698417146444305,
"calibration/batch_entropy_100bins": 0.9435716120808323,
"calibration/batch_entropy_10bins": 0.9698417146444305,
"calibration/batch_entropy_50bins": 0.9666622314375889,
"calibration/batch_uniqueness": 0.96644287109375,
"calibration/buffer_distribution_entropy": 0.9989431015829364,
"calibration/buffer_entropy_100bins": 0.9895180210867552,
"calibration/buffer_entropy_10bins": 0.9989431015829364,
"calibration/buffer_entropy_50bins": 0.9950208033407677,
"calibration/confidence_entropy": 0.4952000686126718,
"calibration/coverage@0%": 0.037109375,
"calibration/coverage@1%": 0.04140625,
"calibration/coverage@10%": 0.1703125,
"calibration/coverage@15%": 0.258984375,
"calibration/coverage@20%": 0.419140625,
"calibration/coverage@25%": 0.47734375,
"calibration/coverage@30%": 0.519921875,
"calibration/coverage@5%": 0.082421875,
"calibration/ece": 0.1614252955334596,
"calibration/mean_confidence": 0.5468318580334597,
"calibration/prompt_uniqueness": 0.86171875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 783.8,
"completions/max_terminated_length": 615.2,
"completions/mean_length": 182.1521484375,
"completions/mean_terminated_length": 182.01974487304688,
"completions/min_length": 84.8,
"completions/min_terminated_length": 84.8,
"epoch": 0.848,
"grad_norm": 0.0010821467731148005,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 890544375.0,
"reward": 0.930538809299469,
"reward_std": 0.08106714338064194,
"rewards/accuracy_reward": 0.5341796875,
"rewards/brier_reward": 0.8012025237083436,
"rewards/confidence_uniqueness_reward": 0.9661448240280152,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003271967126056552,
"rewards/frontier_coverage_0": 0.11554919332265853,
"rewards/frontier_coverage_1": 0.11554919332265853,
"rewards/frontier_coverage_10": 0.11485711932182312,
"rewards/frontier_coverage_15": 0.11126702874898911,
"rewards/frontier_coverage_20": 0.0689346432685852,
"rewards/frontier_coverage_25": 0.04922807216644287,
"rewards/frontier_coverage_5": 0.11537581384181976,
"rewards/frontier_ece_reward": 0.0030869925394654274,
"rewards/frontier_entropy_batch_reward": -0.22139265537261962,
"signal/accuracy_reward/centered_abs_mean": 0.08153076171875,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.11394334435462952,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040765380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040765380859375,
"signal/advantage_abs_mean": 0.061507892608642575,
"signal/advantage_pre_scale_abs_mean": 0.061507892608642575,
"signal/advantage_pre_scale_std": 0.10090996772050857,
"signal/advantage_std": 0.10090996772050857,
"signal/brier_reward/centered_abs_mean": 0.10592394173145295,
"signal/brier_reward/group_bin_occupancy": 0.841796875,
"signal/brier_reward/group_std_mean": 0.13790196180343628,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01059239376336336,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01059239376336336,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012609278596937657,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865234375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016177338361740113,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012609278550371529,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012609278550371529,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029984854627400637,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721875,
"signal/frontier_aurc_reward/group_std_mean": 0.0048636754509061575,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7481067192857156e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7481067192857156e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1415121629834175,
"signal/frontier_coverage_0/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_0/group_std_mean": 0.18623048067092896,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017689020838588475,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017689020838588475,
"signal/frontier_coverage_1/centered_abs_mean": 0.1415121629834175,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_1/group_std_mean": 0.18623048067092896,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017689020838588475,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017689020838588475,
"signal/frontier_coverage_10/centered_abs_mean": 0.14053474068641664,
"signal/frontier_coverage_10/group_bin_occupancy": 0.851171875,
"signal/frontier_coverage_10/group_std_mean": 0.18495951294898988,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017566842725500464,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017566842725500464,
"signal/frontier_coverage_15/centered_abs_mean": 0.1342177927494049,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.17671539783477783,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016777224140241743,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016777224140241743,
"signal/frontier_coverage_20/centered_abs_mean": 0.08487182259559631,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
"signal/frontier_coverage_20/group_std_mean": 0.11134538352489472,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010608977987430989,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010608977987430989,
"signal/frontier_coverage_25/centered_abs_mean": 0.05198915079236031,
"signal/frontier_coverage_25/group_bin_occupancy": 0.91640625,
"signal/frontier_coverage_25/group_std_mean": 0.06686145663261414,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000649864412844181,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000649864412844181,
"signal/frontier_coverage_5/centered_abs_mean": 0.14084831327199937,
"signal/frontier_coverage_5/group_bin_occupancy": 0.851171875,
"signal/frontier_coverage_5/group_std_mean": 0.18535879552364348,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017606039065867663,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017606039065867663,
"signal/frontier_ece_reward/centered_abs_mean": 0.0055714274756610395,
"signal/frontier_ece_reward/group_bin_occupancy": 0.877734375,
"signal/frontier_ece_reward/group_std_mean": 0.007441604882478714,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005571427405811846,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005571427405811846,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2792421877384186,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35623074769973756,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027924218401312827,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027924218401312827,
"step": 265
},
{
"calibration/aurc": 0.26688358629600345,
"calibration/batch_distribution_entropy": 0.956438995609551,
"calibration/batch_entropy_100bins": 0.9302209820641183,
"calibration/batch_entropy_10bins": 0.956438995609551,
"calibration/batch_entropy_50bins": 0.9543006144858946,
"calibration/batch_uniqueness": 0.96548082464166,
"calibration/buffer_distribution_entropy": 0.9990004575823258,
"calibration/buffer_entropy_100bins": 0.9889268859990621,
"calibration/buffer_entropy_10bins": 0.9990004575823258,
"calibration/buffer_entropy_50bins": 0.99498162743099,
"calibration/confidence_entropy": 0.48691284355710457,
"calibration/coverage@0%": 0.01328125,
"calibration/coverage@1%": 0.01328125,
"calibration/coverage@10%": 0.168359375,
"calibration/coverage@15%": 0.240234375,
"calibration/coverage@20%": 0.323828125,
"calibration/coverage@25%": 0.4125,
"calibration/coverage@30%": 0.5446076932485322,
"calibration/coverage@5%": 0.084375,
"calibration/ece": 0.1455257910339714,
"calibration/mean_confidence": 0.6091981775937194,
"calibration/prompt_uniqueness": 0.8710765763202393,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1098.6,
"completions/max_terminated_length": 918.8,
"completions/mean_length": 181.7109375,
"completions/mean_terminated_length": 181.44696960449218,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.864,
"grad_norm": 0.0010378322331234813,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 907391911.0,
"reward": 0.9530243515968323,
"reward_std": 0.08318499326705933,
"rewards/accuracy_reward": 0.59248046875,
"rewards/brier_reward": 0.7973593950271607,
"rewards/confidence_uniqueness_reward": 0.9641671776771545,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002714644838124514,
"rewards/frontier_coverage_0": 0.07732260525226593,
"rewards/frontier_coverage_1": 0.07732260525226593,
"rewards/frontier_coverage_10": 0.07700667977333069,
"rewards/frontier_coverage_15": 0.0751921996474266,
"rewards/frontier_coverage_20": 0.056314506381750104,
"rewards/frontier_coverage_25": 0.05060553103685379,
"rewards/frontier_coverage_5": 0.0774739071726799,
"rewards/frontier_ece_reward": 0.002765231346711516,
"rewards/frontier_entropy_batch_reward": -0.2565395474433899,
"signal/accuracy_reward/centered_abs_mean": 0.090386962890625,
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
"signal/accuracy_reward/group_std_mean": 0.11810308396816253,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451934814453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451934814453125,
"signal/advantage_abs_mean": 0.0654950737953186,
"signal/advantage_pre_scale_abs_mean": 0.0654950737953186,
"signal/advantage_pre_scale_std": 0.10269584357738495,
"signal/advantage_std": 0.10269584357738495,
"signal/brier_reward/centered_abs_mean": 0.10956264287233353,
"signal/brier_reward/group_bin_occupancy": 0.851171875,
"signal/brier_reward/group_std_mean": 0.1409148782491684,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956264473497868,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010956264473497868,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014184213988482953,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834765625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018567436560988426,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014184214174747466,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014184214174747466,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027129411697387694,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
"signal/frontier_aurc_reward/group_std_mean": 0.0043897018767893314,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391176614968572e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391176614968572e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1481944888830185,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8671875,
"signal/frontier_coverage_0/group_std_mean": 0.18974127769470214,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018524311250075697,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018524311250075697,
"signal/frontier_coverage_1/centered_abs_mean": 0.1481944888830185,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8671875,
"signal/frontier_coverage_1/group_std_mean": 0.18974127769470214,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018524311250075697,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018524311250075697,
"signal/frontier_coverage_10/centered_abs_mean": 0.14702675938606263,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_10/group_std_mean": 0.1882396310567856,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018378345994278789,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018378345994278789,
"signal/frontier_coverage_15/centered_abs_mean": 0.14057752192020417,
"signal/frontier_coverage_15/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_15/group_std_mean": 0.1800085186958313,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017572190146893263,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017572190146893263,
"signal/frontier_coverage_20/centered_abs_mean": 0.08514007031917573,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_20/group_std_mean": 0.10977080911397934,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010642508743330837,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010642508743330837,
"signal/frontier_coverage_25/centered_abs_mean": 0.05421077758073807,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
"signal/frontier_coverage_25/group_std_mean": 0.06963766515254974,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006776347407139837,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006776347407139837,
"signal/frontier_coverage_5/centered_abs_mean": 0.1480186551809311,
"signal/frontier_coverage_5/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_5/group_std_mean": 0.18951984047889708,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018502332037314772,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502332037314772,
"signal/frontier_ece_reward/centered_abs_mean": 0.005847407225519419,
"signal/frontier_ece_reward/group_bin_occupancy": 0.850390625,
"signal/frontier_ece_reward/group_std_mean": 0.007833207491785288,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005847407272085547,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005847407272085547,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3008589863777161,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.715625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.371851509809494,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03008589893579483,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03008589893579483,
"step": 270
},
{
"calibration/aurc": 0.3599908256968588,
"calibration/batch_distribution_entropy": 0.9765293546573472,
"calibration/batch_entropy_100bins": 0.9479550555184086,
"calibration/batch_entropy_10bins": 0.9765293546573472,
"calibration/batch_entropy_50bins": 0.9718896673551072,
"calibration/batch_uniqueness": 0.9620710457371701,
"calibration/buffer_distribution_entropy": 0.9989072670831562,
"calibration/buffer_entropy_100bins": 0.9881814276419251,
"calibration/buffer_entropy_10bins": 0.9989072670831562,
"calibration/buffer_entropy_50bins": 0.9947699849601171,
"calibration/confidence_entropy": 0.4855467010850557,
"calibration/coverage@0%": 0.016816750244618393,
"calibration/coverage@1%": 0.016816750244618393,
"calibration/coverage@10%": 0.02775807240704501,
"calibration/coverage@15%": 0.06486897627201565,
"calibration/coverage@20%": 0.1316910775440313,
"calibration/coverage@25%": 0.2810864114481409,
"calibration/coverage@30%": 0.3869679549902153,
"calibration/coverage@5%": 0.016816750244618393,
"calibration/ece": 0.13908466566091793,
"calibration/mean_confidence": 0.47106601660332437,
"calibration/prompt_uniqueness": 0.851669403616025,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 730.0,
"completions/max_terminated_length": 618.6,
"completions/mean_length": 175.8171875,
"completions/mean_terminated_length": 175.41910095214843,
"completions/min_length": 77.6,
"completions/min_terminated_length": 77.6,
"epoch": 0.88,
"grad_norm": 0.0011793546145781875,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 924339351.0,
"reward": 0.9199577450752259,
"reward_std": 0.08229250609874725,
"rewards/accuracy_reward": 0.5189453125,
"rewards/brier_reward": 0.7955085277557373,
"rewards/confidence_uniqueness_reward": 0.9621264696121216,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002559547405689955,
"rewards/frontier_coverage_0": 0.12974209040403367,
"rewards/frontier_coverage_1": 0.12974209040403367,
"rewards/frontier_coverage_10": 0.12853662222623824,
"rewards/frontier_coverage_15": 0.12217865586280822,
"rewards/frontier_coverage_20": 0.0753389410674572,
"rewards/frontier_coverage_25": 0.05095566734671593,
"rewards/frontier_coverage_5": 0.12889423370361328,
"rewards/frontier_ece_reward": 0.0026560436934232714,
"rewards/frontier_entropy_batch_reward": -0.24932879209518433,
"signal/accuracy_reward/centered_abs_mean": 0.09031982421875,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.11694404035806656,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045159912109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045159912109375,
"signal/advantage_abs_mean": 0.06529273688793183,
"signal/advantage_pre_scale_abs_mean": 0.06529273688793183,
"signal/advantage_pre_scale_std": 0.102424056828022,
"signal/advantage_std": 0.102424056828022,
"signal/brier_reward/centered_abs_mean": 0.1097784698009491,
"signal/brier_reward/group_bin_occupancy": 0.84375,
"signal/brier_reward/group_std_mean": 0.14118833541870118,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010977847129106521,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010977847129106521,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014931019768118859,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.850390625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019864151254296303,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014931020326912404,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014931020326912404,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814434766769,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021499829599633813,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625,
"signal/frontier_aurc_reward/group_std_mean": 0.0034981849137693645,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6874786999542267e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6874786999542267e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.162190243601799,
"signal/frontier_coverage_0/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_0/group_std_mean": 0.2070352703332901,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020273780450224877,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020273780450224877,
"signal/frontier_coverage_1/centered_abs_mean": 0.162190243601799,
"signal/frontier_coverage_1/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_1/group_std_mean": 0.2070352703332901,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020273780450224877,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020273780450224877,
"signal/frontier_coverage_10/centered_abs_mean": 0.16074737310409545,
"signal/frontier_coverage_10/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_10/group_std_mean": 0.2051818400621414,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002009342284873128,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002009342284873128,
"signal/frontier_coverage_15/centered_abs_mean": 0.15363400876522065,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_15/group_std_mean": 0.19600152373313903,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019204251701012253,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019204251701012253,
"signal/frontier_coverage_20/centered_abs_mean": 0.09273725599050522,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
"signal/frontier_coverage_20/group_std_mean": 0.1185536801815033,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011592157417908311,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011592157417908311,
"signal/frontier_coverage_25/centered_abs_mean": 0.053829978406429294,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9140625,
"signal/frontier_coverage_25/group_std_mean": 0.06915899068117141,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006728747393935919,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006728747393935919,
"signal/frontier_coverage_5/centered_abs_mean": 0.16112555861473082,
"signal/frontier_coverage_5/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_5/group_std_mean": 0.2056680828332901,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00201406953856349,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00201406953856349,
"signal/frontier_ece_reward/centered_abs_mean": 0.00555073544383049,
"signal/frontier_ece_reward/group_bin_occupancy": 0.85859375,
"signal/frontier_ece_reward/group_std_mean": 0.007629283983260393,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005550735630095005,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005550735630095005,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958798289299011,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37051703929901125,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587984085083008,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587984085083008,
"step": 275
},
{
"calibration/aurc": 0.3397437648873166,
"calibration/batch_distribution_entropy": 0.9796197507037563,
"calibration/batch_entropy_100bins": 0.951087428797685,
"calibration/batch_entropy_10bins": 0.9796197507037563,
"calibration/batch_entropy_50bins": 0.9718575724598498,
"calibration/batch_uniqueness": 0.965020751953125,
"calibration/buffer_distribution_entropy": 0.9988991354973707,
"calibration/buffer_entropy_100bins": 0.9878559564647965,
"calibration/buffer_entropy_10bins": 0.9988991354973707,
"calibration/buffer_entropy_50bins": 0.9948360874758867,
"calibration/confidence_entropy": 0.49223803297364005,
"calibration/coverage@0%": 0.027734375,
"calibration/coverage@1%": 0.027734375,
"calibration/coverage@10%": 0.08359375,
"calibration/coverage@15%": 0.121875,
"calibration/coverage@20%": 0.1984375,
"calibration/coverage@25%": 0.38515625,
"calibration/coverage@30%": 0.4953125,
"calibration/coverage@5%": 0.05703125,
"calibration/ece": 0.13973601884101564,
"calibration/mean_confidence": 0.4944046061589843,
"calibration/prompt_uniqueness": 0.854248046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 751.2,
"completions/max_terminated_length": 593.2,
"completions/mean_length": 174.5083984375,
"completions/mean_terminated_length": 174.11077270507812,
"completions/min_length": 81.2,
"completions/min_terminated_length": 81.2,
"epoch": 0.896,
"grad_norm": 0.0028364313766360283,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 941237165.0,
"reward": 0.935793137550354,
"reward_std": 0.07484155595302582,
"rewards/accuracy_reward": 0.54541015625,
"rewards/brier_reward": 0.8021585941314697,
"rewards/confidence_uniqueness_reward": 0.9650752425193787,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0026960095157846807,
"rewards/frontier_coverage_0": 0.11437956839799882,
"rewards/frontier_coverage_1": 0.11437956839799882,
"rewards/frontier_coverage_10": 0.11248253881931305,
"rewards/frontier_coverage_15": 0.10827968120574952,
"rewards/frontier_coverage_20": 0.07057406008243561,
"rewards/frontier_coverage_25": 0.05096975192427635,
"rewards/frontier_coverage_5": 0.11267611980438233,
"rewards/frontier_ece_reward": 0.0022103100549429656,
"rewards/frontier_entropy_batch_reward": -0.2222293496131897,
"signal/accuracy_reward/centered_abs_mean": 0.076336669921875,
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
"signal/accuracy_reward/group_std_mean": 0.10640045404434204,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0381683349609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0381683349609375,
"signal/advantage_abs_mean": 0.05686543136835098,
"signal/advantage_pre_scale_abs_mean": 0.05686543136835098,
"signal/advantage_pre_scale_std": 0.09319915175437928,
"signal/advantage_std": 0.09319915175437928,
"signal/brier_reward/centered_abs_mean": 0.1045459121465683,
"signal/brier_reward/group_bin_occupancy": 0.85078125,
"signal/brier_reward/group_std_mean": 0.13514408171176912,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010454590804874897,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010454590804874897,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012861154228448867,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86171875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017182295396924018,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012861154275014997,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012861154275014997,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814434766769,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021444797981530427,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
"signal/frontier_aurc_reward/group_std_mean": 0.0034413845278322697,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6805997185874732e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6805997185874732e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.15001226961612701,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_0/group_std_mean": 0.19234943985939026,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001875153393484652,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001875153393484652,
"signal/frontier_coverage_1/centered_abs_mean": 0.15001226961612701,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.19234943985939026,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001875153393484652,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001875153393484652,
"signal/frontier_coverage_10/centered_abs_mean": 0.14806943833827974,
"signal/frontier_coverage_10/group_bin_occupancy": 0.867578125,
"signal/frontier_coverage_10/group_std_mean": 0.18986817002296447,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018508680164813994,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018508680164813994,
"signal/frontier_coverage_15/centered_abs_mean": 0.13712047040462494,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_15/group_std_mean": 0.17578884959220886,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017140058567747473,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017140058567747473,
"signal/frontier_coverage_20/centered_abs_mean": 0.08517580181360244,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_20/group_std_mean": 0.10908302515745164,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010646975366398691,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010646975366398691,
"signal/frontier_coverage_25/centered_abs_mean": 0.05108058974146843,
"signal/frontier_coverage_25/group_bin_occupancy": 0.910546875,
"signal/frontier_coverage_25/group_std_mean": 0.06581792756915092,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006385074113495648,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006385074113495648,
"signal/frontier_coverage_5/centered_abs_mean": 0.14839180409908295,
"signal/frontier_coverage_5/group_bin_occupancy": 0.867578125,
"signal/frontier_coverage_5/group_std_mean": 0.1902903586626053,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001854897616431117,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001854897616431117,
"signal/frontier_ece_reward/centered_abs_mean": 0.005655341129750013,
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
"signal/frontier_ece_reward/group_std_mean": 0.008241251390427352,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005655340966768563,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005655340966768563,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28620743155479433,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.707421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35727530121803286,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02862074300646782,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02862074300646782,
"step": 280
},
{
"calibration/aurc": 0.34525926666499224,
"calibration/batch_distribution_entropy": 0.9790175360990441,
"calibration/batch_entropy_100bins": 0.9482314322026811,
"calibration/batch_entropy_10bins": 0.9790175360990441,
"calibration/batch_entropy_50bins": 0.9722607174037909,
"calibration/batch_uniqueness": 0.9663451804952441,
"calibration/buffer_distribution_entropy": 0.9988060335454433,
"calibration/buffer_entropy_100bins": 0.9873172216290733,
"calibration/buffer_entropy_10bins": 0.9988060335454433,
"calibration/buffer_entropy_50bins": 0.9947503690244401,
"calibration/confidence_entropy": 0.49496733877215143,
"calibration/coverage@0%": 0.020322437622309196,
"calibration/coverage@1%": 0.020322437622309196,
"calibration/coverage@10%": 0.10869159735812133,
"calibration/coverage@15%": 0.23450266022504893,
"calibration/coverage@20%": 0.3384211411448141,
"calibration/coverage@25%": 0.4357211656066536,
"calibration/coverage@30%": 0.4935504831213307,
"calibration/coverage@5%": 0.06336227984344422,
"calibration/ece": 0.1643798130198141,
"calibration/mean_confidence": 0.5136691958170254,
"calibration/prompt_uniqueness": 0.8665675708084027,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 917.2,
"completions/max_terminated_length": 593.2,
"completions/mean_length": 174.08798828125,
"completions/mean_terminated_length": 173.82220153808595,
"completions/min_length": 78.2,
"completions/min_terminated_length": 78.2,
"epoch": 0.912,
"grad_norm": 0.0006290775490924716,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 958071122.0,
"reward": 0.9379110097885132,
"reward_std": 0.07883523255586625,
"rewards/accuracy_reward": 0.55283203125,
"rewards/brier_reward": 0.8023535490036011,
"rewards/confidence_uniqueness_reward": 0.966447937488556,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002613516733981669,
"rewards/frontier_coverage_0": 0.09809458376839757,
"rewards/frontier_coverage_1": 0.09809458376839757,
"rewards/frontier_coverage_10": 0.09699874348007143,
"rewards/frontier_coverage_15": 0.09137383892666548,
"rewards/frontier_coverage_20": 0.06403161454945802,
"rewards/frontier_coverage_25": 0.05141137093305588,
"rewards/frontier_coverage_5": 0.09733490133658051,
"rewards/frontier_ece_reward": 0.0027125254506245255,
"rewards/frontier_entropy_batch_reward": -0.22992810904979705,
"signal/accuracy_reward/centered_abs_mean": 0.078399658203125,
"signal/accuracy_reward/group_bin_occupancy": 0.165234375,
"signal/accuracy_reward/group_std_mean": 0.10746027380228043,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0391998291015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0391998291015625,
"signal/advantage_abs_mean": 0.06045843511819839,
"signal/advantage_pre_scale_abs_mean": 0.06045843511819839,
"signal/advantage_pre_scale_std": 0.0974207267165184,
"signal/advantage_std": 0.0974207267165184,
"signal/brier_reward/centered_abs_mean": 0.10833604633808136,
"signal/brier_reward/group_bin_occupancy": 0.858203125,
"signal/brier_reward/group_std_mean": 0.13938207030296326,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833604633808136,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010833604633808136,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012309185788035392,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016317157819867135,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012309186393395066,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012309186393395066,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002327501564286649,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.724609375,
"signal/frontier_aurc_reward/group_std_mean": 0.003778242599219084,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.909376962634269e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.909376962634269e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14588625729084015,
"signal/frontier_coverage_0/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_0/group_std_mean": 0.1870903730392456,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018235782859846949,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018235782859846949,
"signal/frontier_coverage_1/centered_abs_mean": 0.14588625729084015,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_1/group_std_mean": 0.1870903730392456,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018235782859846949,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018235782859846949,
"signal/frontier_coverage_10/centered_abs_mean": 0.14415820091962814,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.18487805426120757,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001801977539435029,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001801977539435029,
"signal/frontier_coverage_15/centered_abs_mean": 0.13210653364658356,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8671875,
"signal/frontier_coverage_15/group_std_mean": 0.16943660378456116,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016513317124918104,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016513317124918104,
"signal/frontier_coverage_20/centered_abs_mean": 0.08194544017314911,
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_20/group_std_mean": 0.10515519231557846,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001024318009149283,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001024318009149283,
"signal/frontier_coverage_25/centered_abs_mean": 0.053516195714473726,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
"signal/frontier_coverage_25/group_std_mean": 0.06882122904062271,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006689524743705988,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006689524743705988,
"signal/frontier_coverage_5/centered_abs_mean": 0.14475657939910888,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.18564701378345488,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018094572937116028,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018094572937116028,
"signal/frontier_ece_reward/centered_abs_mean": 0.00575404018163681,
"signal/frontier_ece_reward/group_bin_occupancy": 0.846875,
"signal/frontier_ece_reward/group_std_mean": 0.008270268887281417,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005754040437750518,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005754040437750518,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2830525994300842,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35513145923614503,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028305261209607125,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028305261209607125,
"step": 285
},
{
"calibration/aurc": 0.4236405616968358,
"calibration/batch_distribution_entropy": 0.9818024399334021,
"calibration/batch_entropy_100bins": 0.9521496186391095,
"calibration/batch_entropy_10bins": 0.9818024399334021,
"calibration/batch_entropy_50bins": 0.9757064291499068,
"calibration/batch_uniqueness": 0.9665863037109375,
"calibration/buffer_distribution_entropy": 0.9987764022593268,
"calibration/buffer_entropy_100bins": 0.9868602362750798,
"calibration/buffer_entropy_10bins": 0.9987764022593268,
"calibration/buffer_entropy_50bins": 0.9947180018038975,
"calibration/confidence_entropy": 0.5021496893703811,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.014453125,
"calibration/coverage@15%": 0.028515625,
"calibration/coverage@20%": 0.050390625,
"calibration/coverage@25%": 0.062109375,
"calibration/coverage@30%": 0.205078125,
"calibration/coverage@5%": 0.00234375,
"calibration/ece": 0.13476296875,
"calibration/mean_confidence": 0.506945,
"calibration/prompt_uniqueness": 0.864794921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 853.6,
"completions/max_terminated_length": 853.6,
"completions/mean_length": 172.291015625,
"completions/mean_terminated_length": 172.291015625,
"completions/min_length": 79.8,
"completions/min_terminated_length": 79.8,
"epoch": 0.928,
"grad_norm": 0.0007745189359411597,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 974862198.0,
"reward": 0.9235079884529114,
"reward_std": 0.07905451804399491,
"rewards/accuracy_reward": 0.5291015625,
"rewards/brier_reward": 0.7884802699089051,
"rewards/confidence_uniqueness_reward": 0.9643653869628906,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0033654853235930205,
"rewards/frontier_coverage_0": 0.11306976824998856,
"rewards/frontier_coverage_1": 0.11306976824998856,
"rewards/frontier_coverage_10": 0.1117068201303482,
"rewards/frontier_coverage_15": 0.1046798437833786,
"rewards/frontier_coverage_20": 0.07044542729854583,
"rewards/frontier_coverage_25": 0.05282995253801346,
"rewards/frontier_coverage_5": 0.1121548593044281,
"rewards/frontier_ece_reward": 0.002680363832041621,
"rewards/frontier_entropy_batch_reward": -0.2502776861190796,
"signal/accuracy_reward/centered_abs_mean": 0.07894287109375,
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
"signal/accuracy_reward/group_std_mean": 0.10790681540966034,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039471435546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039471435546875,
"signal/advantage_abs_mean": 0.06104508712887764,
"signal/advantage_pre_scale_abs_mean": 0.06104508712887764,
"signal/advantage_pre_scale_std": 0.09825572371482849,
"signal/advantage_std": 0.09825572371482849,
"signal/brier_reward/centered_abs_mean": 0.11147891283035279,
"signal/brier_reward/group_bin_occupancy": 0.84375,
"signal/brier_reward/group_std_mean": 0.14375323951244354,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011147891730070114,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011147891730070114,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013390088081359863,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016832890920341015,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013390088919550181,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013390088919550181,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029725271509960295,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69453125,
"signal/frontier_aurc_reward/group_std_mean": 0.004932621866464615,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7156591133680195e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7156591133680195e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.146697798371315,
"signal/frontier_coverage_0/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_0/group_std_mean": 0.1891067087650299,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018337224144488573,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018337224144488573,
"signal/frontier_coverage_1/centered_abs_mean": 0.146697798371315,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_1/group_std_mean": 0.1891067087650299,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018337224144488573,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018337224144488573,
"signal/frontier_coverage_10/centered_abs_mean": 0.14494749903678894,
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
"signal/frontier_coverage_10/group_std_mean": 0.1868603676557541,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001811843877658248,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001811843877658248,
"signal/frontier_coverage_15/centered_abs_mean": 0.13379482328891754,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_15/group_std_mean": 0.17280838787555694,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016724353889003396,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016724353889003396,
"signal/frontier_coverage_20/centered_abs_mean": 0.0833568200469017,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88984375,
"signal/frontier_coverage_20/group_std_mean": 0.10767639130353927,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010419602738693356,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010419602738693356,
"signal/frontier_coverage_25/centered_abs_mean": 0.05601404085755348,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9171875,
"signal/frontier_coverage_25/group_std_mean": 0.07191484123468399,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007001755409874022,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007001755409874022,
"signal/frontier_coverage_5/centered_abs_mean": 0.1458159238100052,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8734375,
"signal/frontier_coverage_5/group_std_mean": 0.1879925400018692,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018226990709081293,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018226990709081293,
"signal/frontier_ece_reward/centered_abs_mean": 0.005948805715888739,
"signal/frontier_ece_reward/group_bin_occupancy": 0.857421875,
"signal/frontier_ece_reward/group_std_mean": 0.008423867449164391,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005948805715888739,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005948805715888739,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958779692649841,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3708716452121735,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587796702980996,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587796702980996,
"step": 290
},
{
"calibration/aurc": 0.23491377854573506,
"calibration/batch_distribution_entropy": 0.9814271545653476,
"calibration/batch_entropy_100bins": 0.9529836470801094,
"calibration/batch_entropy_10bins": 0.9814271545653476,
"calibration/batch_entropy_50bins": 0.9721020286908271,
"calibration/batch_uniqueness": 0.9633582912444499,
"calibration/buffer_distribution_entropy": 0.9988420216830608,
"calibration/buffer_entropy_100bins": 0.9863146646855876,
"calibration/buffer_entropy_10bins": 0.9988420216830608,
"calibration/buffer_entropy_50bins": 0.9948149826034557,
"calibration/confidence_entropy": 0.5013591675015299,
"calibration/coverage@0%": 0.03478167808219178,
"calibration/coverage@1%": 0.03478167808219178,
"calibration/coverage@10%": 0.24629250244618395,
"calibration/coverage@15%": 0.3827597541585127,
"calibration/coverage@20%": 0.4863770486790607,
"calibration/coverage@25%": 0.5665117416829746,
"calibration/coverage@30%": 0.6513538099315068,
"calibration/coverage@5%": 0.09147810665362036,
"calibration/ece": 0.1117759863472358,
"calibration/mean_confidence": 0.44929093207252935,
"calibration/prompt_uniqueness": 0.8603180280957335,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 853.6,
"completions/max_terminated_length": 632.8,
"completions/mean_length": 170.640625,
"completions/mean_terminated_length": 170.50676574707032,
"completions/min_length": 78.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.944,
"grad_norm": 0.0011778445914387703,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 991584982.0,
"reward": 0.9295665860176087,
"reward_std": 0.08568341732025146,
"rewards/accuracy_reward": 0.54228515625,
"rewards/brier_reward": 0.7837172031402588,
"rewards/confidence_uniqueness_reward": 0.9617600560188293,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0024823052808642387,
"rewards/frontier_coverage_0": 0.10404116213321686,
"rewards/frontier_coverage_1": 0.10404116213321686,
"rewards/frontier_coverage_10": 0.10361252054572105,
"rewards/frontier_coverage_15": 0.09421005547046661,
"rewards/frontier_coverage_20": 0.06851446852087975,
"rewards/frontier_coverage_25": 0.04667741134762764,
"rewards/frontier_coverage_5": 0.10377772152423859,
"rewards/frontier_ece_reward": 0.001886871492024511,
"rewards/frontier_entropy_batch_reward": -0.23994633555412292,
"signal/accuracy_reward/centered_abs_mean": 0.102056884765625,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.13405922651290894,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0510284423828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0510284423828125,
"signal/advantage_abs_mean": 0.06657596528530121,
"signal/advantage_pre_scale_abs_mean": 0.06657596528530121,
"signal/advantage_pre_scale_std": 0.10576380938291549,
"signal/advantage_std": 0.10576380938291549,
"signal/brier_reward/centered_abs_mean": 0.11052304357290268,
"signal/brier_reward/group_bin_occupancy": 0.847265625,
"signal/brier_reward/group_std_mean": 0.14318473637104034,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01105230450630188,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01105230450630188,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014990394562482834,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019529133662581445,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014990394469350577,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014990394469350577,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018933590967208148,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_aurc_reward/group_std_mean": 0.003138176305219531,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3666988272452728e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3666988272452728e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17136546075344086,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_0/group_std_mean": 0.21953192353248596,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002142068138346076,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002142068138346076,
"signal/frontier_coverage_1/centered_abs_mean": 0.17136546075344086,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.21953192353248596,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002142068138346076,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002142068138346076,
"signal/frontier_coverage_10/centered_abs_mean": 0.169466295838356,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_10/group_std_mean": 0.217143777012825,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021183287259191274,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021183287259191274,
"signal/frontier_coverage_15/centered_abs_mean": 0.15599824488162994,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_15/group_std_mean": 0.19977592229843139,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019499780144542455,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019499780144542455,
"signal/frontier_coverage_20/centered_abs_mean": 0.09491551518440247,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_20/group_std_mean": 0.12218321114778519,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011864439584314823,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011864439584314823,
"signal/frontier_coverage_25/centered_abs_mean": 0.05510400533676148,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90078125,
"signal/frontier_coverage_25/group_std_mean": 0.07128551304340362,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006888000760227441,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006888000760227441,
"signal/frontier_coverage_5/centered_abs_mean": 0.1706299215555191,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_5/group_std_mean": 0.2185954213142395,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00213287400547415,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00213287400547415,
"signal/frontier_ece_reward/centered_abs_mean": 0.005686651263386011,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8765625,
"signal/frontier_ece_reward/group_std_mean": 0.0076931707561016084,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005686651449650526,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005686651449650526,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28447132706642153,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3533455073833466,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02844713404774666,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02844713404774666,
"step": 295
},
{
"calibration/aurc": 0.34800169365994993,
"calibration/batch_distribution_entropy": 0.9812203477810147,
"calibration/batch_entropy_100bins": 0.9538863455386668,
"calibration/batch_entropy_10bins": 0.9812203477810147,
"calibration/batch_entropy_50bins": 0.9751755655335739,
"calibration/batch_uniqueness": 0.9656280517578125,
"calibration/buffer_distribution_entropy": 0.9989293186275472,
"calibration/buffer_entropy_100bins": 0.9856529750536842,
"calibration/buffer_entropy_10bins": 0.9989293186275472,
"calibration/buffer_entropy_50bins": 0.9948214021847231,
"calibration/confidence_entropy": 0.47452948094860803,
"calibration/coverage@0%": 0.0078125,
"calibration/coverage@1%": 0.0078125,
"calibration/coverage@10%": 0.08203125,
"calibration/coverage@15%": 0.226171875,
"calibration/coverage@20%": 0.2671875,
"calibration/coverage@25%": 0.3203125,
"calibration/coverage@30%": 0.391015625,
"calibration/coverage@5%": 0.010546875,
"calibration/ece": 0.16367422836523438,
"calibration/mean_confidence": 0.5006282908652343,
"calibration/prompt_uniqueness": 0.855078125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 744.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 168.0302734375,
"completions/mean_terminated_length": 167.89690246582032,
"completions/min_length": 73.4,
"completions/min_terminated_length": 73.4,
"epoch": 0.96,
"grad_norm": 0.0010172611800953746,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 1008245932.0,
"reward": 0.9288432955741882,
"reward_std": 0.07382949590682983,
"rewards/accuracy_reward": 0.5306640625,
"rewards/brier_reward": 0.8049243211746215,
"rewards/confidence_uniqueness_reward": 0.9639307618141174,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0031038288958370685,
"rewards/frontier_coverage_0": 0.13451858162879943,
"rewards/frontier_coverage_1": 0.13451858162879943,
"rewards/frontier_coverage_10": 0.13342588990926743,
"rewards/frontier_coverage_15": 0.12053216546773911,
"rewards/frontier_coverage_20": 0.08194337785243988,
"rewards/frontier_coverage_25": 0.05771302729845047,
"rewards/frontier_coverage_5": 0.13427408933639526,
"rewards/frontier_ece_reward": 0.0028995629400014877,
"rewards/frontier_entropy_batch_reward": -0.23538158535957338,
"signal/accuracy_reward/centered_abs_mean": 0.07486572265625,
"signal/accuracy_reward/group_bin_occupancy": 0.16328125,
"signal/accuracy_reward/group_std_mean": 0.10253596603870392,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037432861328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037432861328125,
"signal/advantage_abs_mean": 0.056790337711572644,
"signal/advantage_pre_scale_abs_mean": 0.056790337711572644,
"signal/advantage_pre_scale_std": 0.09253572970628739,
"signal/advantage_std": 0.09253572970628739,
"signal/brier_reward/centered_abs_mean": 0.10669752955436707,
"signal/brier_reward/group_bin_occupancy": 0.825,
"signal/brier_reward/group_std_mean": 0.13931142389774323,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010669752955436707,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010669752955436707,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013811485469341278,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.859375,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01783113442361355,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001381148537620902,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001381148537620902,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002649222710169852,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.711328125,
"signal/frontier_aurc_reward/group_std_mean": 0.004259026004001498,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.311528380436357e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.311528380436357e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1507669657468796,
"signal/frontier_coverage_0/group_bin_occupancy": 0.848046875,
"signal/frontier_coverage_0/group_std_mean": 0.19579726755619048,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001884587062522769,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001884587062522769,
"signal/frontier_coverage_1/centered_abs_mean": 0.1507669657468796,
"signal/frontier_coverage_1/group_bin_occupancy": 0.848046875,
"signal/frontier_coverage_1/group_std_mean": 0.19579726755619048,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001884587062522769,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001884587062522769,
"signal/frontier_coverage_10/centered_abs_mean": 0.1489147961139679,
"signal/frontier_coverage_10/group_bin_occupancy": 0.848046875,
"signal/frontier_coverage_10/group_std_mean": 0.19344739615917206,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018614350352436303,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018614350352436303,
"signal/frontier_coverage_15/centered_abs_mean": 0.13582422733306884,
"signal/frontier_coverage_15/group_bin_occupancy": 0.847265625,
"signal/frontier_coverage_15/group_std_mean": 0.17663869857788086,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001697802823036909,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001697802823036909,
"signal/frontier_coverage_20/centered_abs_mean": 0.08488290458917618,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_20/group_std_mean": 0.11030421555042266,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010610363446176053,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010610363446176053,
"signal/frontier_coverage_25/centered_abs_mean": 0.05705864131450653,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923828125,
"signal/frontier_coverage_25/group_std_mean": 0.07287163138389588,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007132330210879445,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007132330210879445,
"signal/frontier_coverage_5/centered_abs_mean": 0.15014611780643464,
"signal/frontier_coverage_5/group_bin_occupancy": 0.84765625,
"signal/frontier_coverage_5/group_std_mean": 0.1949920028448105,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00187682646792382,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00187682646792382,
"signal/frontier_ece_reward/centered_abs_mean": 0.0062250176444649695,
"signal/frontier_ece_reward/group_bin_occupancy": 0.844921875,
"signal/frontier_ece_reward/group_std_mean": 0.008787142857909203,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006225017714314163,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006225017714314163,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2811248004436493,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72109375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3526135325431824,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028112480789422988,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028112480789422988,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4737306351081816,
"eval_calibration/batch_distribution_entropy": 0.9045024028911264,
"eval_calibration/batch_entropy_100bins": 0.6949329777280961,
"eval_calibration/batch_entropy_10bins": 0.9045024028911264,
"eval_calibration/batch_entropy_50bins": 0.7689095013084137,
"eval_calibration/batch_uniqueness": 0.8984375,
"eval_calibration/buffer_distribution_entropy": 0.9988096055976916,
"eval_calibration/buffer_entropy_100bins": 0.9849748824561931,
"eval_calibration/buffer_entropy_10bins": 0.9988096055976916,
"eval_calibration/buffer_entropy_50bins": 0.9946333864318222,
"eval_calibration/confidence_entropy": 0.4787274667660325,
"eval_calibration/coverage@0%": 0.046875,
"eval_calibration/coverage@1%": 0.046875,
"eval_calibration/coverage@10%": 0.046875,
"eval_calibration/coverage@15%": 0.046875,
"eval_calibration/coverage@20%": 0.0546875,
"eval_calibration/coverage@25%": 0.15625,
"eval_calibration/coverage@30%": 0.1640625,
"eval_calibration/coverage@5%": 0.046875,
"eval_calibration/ece": 0.21578124999999998,
"eval_calibration/mean_confidence": 0.47687499999999994,
"eval_calibration/prompt_uniqueness": 0.8984375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 422.0,
"eval_completions/max_terminated_length": 422.0,
"eval_completions/mean_length": 173.66567993164062,
"eval_completions/mean_terminated_length": 173.66567993164062,
"eval_completions/min_length": 87.0,
"eval_completions/min_terminated_length": 87.0,
"eval_loss": 0.0,
"eval_num_tokens": 1008245932.0,
"eval_reward": 0.7935565859079361,
"eval_reward_std": 0.2286548987030983,
"eval_rewards/accuracy_reward": 0.416015625,
"eval_rewards/brier_reward": 0.799243688583374,
"eval_rewards/confidence_uniqueness_reward": 0.91015625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003638996509835124,
"eval_rewards/frontier_coverage_0": 0.19927702844142914,
"eval_rewards/frontier_coverage_1": 0.19927702844142914,
"eval_rewards/frontier_coverage_10": 0.19661162421107292,
"eval_rewards/frontier_coverage_15": 0.17882990464568138,
"eval_rewards/frontier_coverage_20": 0.10959831066429615,
"eval_rewards/frontier_coverage_25": 0.057190462946891785,
"eval_rewards/frontier_coverage_5": 0.19842347875237465,
"eval_rewards/frontier_ece_reward": 0.004141724260989577,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 21.0746,
"eval_samples_per_second": 23.725,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4656982421875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49005643278360367,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23284912109375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23284912109375,
"eval_signal/advantage_abs_mean": 0.21324742957949638,
"eval_signal/advantage_pre_scale_abs_mean": 0.21324742957949638,
"eval_signal/advantage_pre_scale_std": 0.22612683847546577,
"eval_signal/advantage_std": 0.22612683847546577,
"eval_signal/brier_reward/centered_abs_mean": 0.18935201317071915,
"eval_signal/brier_reward/group_bin_occupancy": 0.90625,
"eval_signal/brier_reward/group_std_mean": 0.24315428733825684,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01893520262092352,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01893520262092352,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0336761474609375,
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3203125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.038827759213745594,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003367614757735282,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003367614757735282,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004632304655387998,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.703125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008442466845735908,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.790380964754149e-05,
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.790380964754149e-05,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3491540476679802,
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_0/group_std_mean": 0.42496294528245926,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004364425898529589,
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004364425898529589,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3491540476679802,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.42496294528245926,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004364425898529589,
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004364425898529589,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3441574051976204,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4191160574555397,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043019677978008986,
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043019677978008986,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.31247151643037796,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.38298317044973373,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003905894060153514,
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003905894060153514,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.17517539486289024,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625,
"eval_signal/frontier_coverage_20/group_std_mean": 0.22201964259147644,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021896924590691924,
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021896924590691924,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09041432663798332,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11431009136140347,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011301790946163237,
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011301790946163237,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793277829885483,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.42353837192058563,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004349160008132458,
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004349160008132458,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007637793896719813,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.984375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.009760213550180197,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007637794187758118,
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007637794187758118,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.19,
"step": 300
},
{
"calibration/aurc": 0.25243171988357693,
"calibration/batch_distribution_entropy": 0.9739037924606638,
"calibration/batch_entropy_100bins": 0.9439049482785213,
"calibration/batch_entropy_10bins": 0.9739037924606638,
"calibration/batch_entropy_50bins": 0.9694166202887426,
"calibration/batch_uniqueness": 0.966259765625,
"calibration/buffer_distribution_entropy": 0.99871030928833,
"calibration/buffer_entropy_100bins": 0.9844605941497815,
"calibration/buffer_entropy_10bins": 0.99871030928833,
"calibration/buffer_entropy_50bins": 0.9945275683891899,
"calibration/confidence_entropy": 0.5045930767500602,
"calibration/coverage@0%": 0.035546875,
"calibration/coverage@1%": 0.035546875,
"calibration/coverage@10%": 0.274609375,
"calibration/coverage@15%": 0.432421875,
"calibration/coverage@20%": 0.501953125,
"calibration/coverage@25%": 0.563671875,
"calibration/coverage@30%": 0.607421875,
"calibration/coverage@5%": 0.09296875,
"calibration/ece": 0.11844418789062501,
"calibration/mean_confidence": 0.522810125390625,
"calibration/prompt_uniqueness": 0.87255859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1185.4,
"completions/max_terminated_length": 604.8,
"completions/mean_length": 172.418359375,
"completions/mean_terminated_length": 171.7501678466797,
"completions/min_length": 79.6,
"completions/min_terminated_length": 79.6,
"epoch": 0.976,
"grad_norm": 0.001320485258474946,
"learning_rate": 1e-06,
"loss": 0.0019,
"num_tokens": 1024872616.0,
"reward": 0.9389370799064636,
"reward_std": 0.08163964003324509,
"rewards/accuracy_reward": 0.5552734375,
"rewards/brier_reward": 0.8005570650100708,
"rewards/confidence_uniqueness_reward": 0.9648543715476989,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.00284066004678607,
"rewards/frontier_coverage_0": 0.10627357796765864,
"rewards/frontier_coverage_1": 0.10627357796765864,
"rewards/frontier_coverage_10": 0.10575458101229743,
"rewards/frontier_coverage_15": 0.09721773080527782,
"rewards/frontier_coverage_20": 0.07075041458010674,
"rewards/frontier_coverage_25": 0.051219668984413144,
"rewards/frontier_coverage_5": 0.10608085230924189,
"rewards/frontier_ece_reward": 0.0025254017557017503,
"rewards/frontier_entropy_batch_reward": -0.23258313536643982,
"signal/accuracy_reward/centered_abs_mean": 0.08868408203125,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.11722440421581268,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044342041015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044342041015625,
"signal/advantage_abs_mean": 0.06320648193359375,
"signal/advantage_pre_scale_abs_mean": 0.06320648193359375,
"signal/advantage_pre_scale_std": 0.10139600187540054,
"signal/advantage_std": 0.10139600187540054,
"signal/brier_reward/centered_abs_mean": 0.1057712584733963,
"signal/brier_reward/group_bin_occupancy": 0.85,
"signal/brier_reward/group_std_mean": 0.13689170479774476,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0105771254748106,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0105771254748106,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013170672208070755,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01751931421458721,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001317067281343043,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001317067281343043,
"signal/format_reward/centered_abs_mean": 0.000909423828125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.002030306123197079,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004547119140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004547119140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002454556990414858,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70625,
"signal/frontier_aurc_reward/group_std_mean": 0.004182360181584954,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.068196165258996e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.068196165258996e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.151848965883255,
"signal/frontier_coverage_0/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_0/group_std_mean": 0.19617189466953278,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001898112171329558,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001898112171329558,
"signal/frontier_coverage_1/centered_abs_mean": 0.151848965883255,
"signal/frontier_coverage_1/group_bin_occupancy": 0.868359375,
"signal/frontier_coverage_1/group_std_mean": 0.19617189466953278,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001898112171329558,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001898112171329558,
"signal/frontier_coverage_10/centered_abs_mean": 0.1494060769677162,
"signal/frontier_coverage_10/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_10/group_std_mean": 0.1930826336145401,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018675760366022587,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018675760366022587,
"signal/frontier_coverage_15/centered_abs_mean": 0.1364602714776993,
"signal/frontier_coverage_15/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_15/group_std_mean": 0.1767397940158844,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017057533143088222,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017057533143088222,
"signal/frontier_coverage_20/centered_abs_mean": 0.08021349385380745,
"signal/frontier_coverage_20/group_bin_occupancy": 0.875390625,
"signal/frontier_coverage_20/group_std_mean": 0.10482732057571412,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010026687057688832,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010026687057688832,
"signal/frontier_coverage_25/centered_abs_mean": 0.0531280666589737,
"signal/frontier_coverage_25/group_bin_occupancy": 0.922265625,
"signal/frontier_coverage_25/group_std_mean": 0.06855799853801728,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006641008774749934,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006641008774749934,
"signal/frontier_coverage_5/centered_abs_mean": 0.1511296510696411,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_5/group_std_mean": 0.19528249204158782,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018891207640990616,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018891207640990616,
"signal/frontier_ece_reward/centered_abs_mean": 0.006002122722566128,
"signal/frontier_ece_reward/group_bin_occupancy": 0.861328125,
"signal/frontier_ece_reward/group_std_mean": 0.008093012310564519,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006002122885547578,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006002122885547578,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28755232095718386,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72265625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3637108564376831,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02875523306429386,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02875523306429386,
"step": 305
},
{
"calibration/aurc": 0.36275381546387697,
"calibration/batch_distribution_entropy": 0.9818363303055356,
"calibration/batch_entropy_100bins": 0.9516448277738757,
"calibration/batch_entropy_10bins": 0.9818363303055356,
"calibration/batch_entropy_50bins": 0.9751574070879462,
"calibration/batch_uniqueness": 0.965179443359375,
"calibration/buffer_distribution_entropy": 0.9985763272881746,
"calibration/buffer_entropy_100bins": 0.9834172411685647,
"calibration/buffer_entropy_10bins": 0.9985763272881746,
"calibration/buffer_entropy_50bins": 0.9944091208256998,
"calibration/confidence_entropy": 0.5024404900454493,
"calibration/coverage@0%": 0.00546875,
"calibration/coverage@1%": 0.00546875,
"calibration/coverage@10%": 0.019921875,
"calibration/coverage@15%": 0.05546875,
"calibration/coverage@20%": 0.108203125,
"calibration/coverage@25%": 0.294921875,
"calibration/coverage@30%": 0.43984375,
"calibration/coverage@5%": 0.00546875,
"calibration/ece": 0.11440790949765625,
"calibration/mean_confidence": 0.4765774462054687,
"calibration/prompt_uniqueness": 0.871875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 780.4,
"completions/max_terminated_length": 620.6,
"completions/mean_length": 166.96044921875,
"completions/mean_terminated_length": 166.827734375,
"completions/min_length": 77.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.992,
"grad_norm": 0.0008437388460151851,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 1041710771.0,
"reward": 0.9289904713630677,
"reward_std": 0.07655752152204513,
"rewards/accuracy_reward": 0.5341796875,
"rewards/brier_reward": 0.8037751078605652,
"rewards/confidence_uniqueness_reward": 0.9644991874694824,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002967359917238355,
"rewards/frontier_coverage_0": 0.12212227135896683,
"rewards/frontier_coverage_1": 0.12212227135896683,
"rewards/frontier_coverage_10": 0.12049156278371811,
"rewards/frontier_coverage_15": 0.11204217970371247,
"rewards/frontier_coverage_20": 0.07388233989477158,
"rewards/frontier_coverage_25": 0.055122246593236925,
"rewards/frontier_coverage_5": 0.1214935302734375,
"rewards/frontier_ece_reward": 0.0031089282827451827,
"rewards/frontier_entropy_batch_reward": -0.2424273669719696,
"signal/accuracy_reward/centered_abs_mean": 0.0775146484375,
"signal/accuracy_reward/group_bin_occupancy": 0.1625,
"signal/accuracy_reward/group_std_mean": 0.1029381737112999,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03875732421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03875732421875,
"signal/advantage_abs_mean": 0.05987899079918861,
"signal/advantage_pre_scale_abs_mean": 0.05987899079918861,
"signal/advantage_pre_scale_std": 0.09616845995187759,
"signal/advantage_std": 0.09616845995187759,
"signal/brier_reward/centered_abs_mean": 0.10155004113912583,
"signal/brier_reward/group_bin_occupancy": 0.844140625,
"signal/brier_reward/group_std_mean": 0.1313195899128914,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010155004076659679,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010155004076659679,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012845552526414394,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.878515625,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016458464972674846,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012845552759245039,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012845552759245039,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002551899803802371,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625,
"signal/frontier_aurc_reward/group_std_mean": 0.004267166648060083,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1898749148240314e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1898749148240314e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14079180657863616,
"signal/frontier_coverage_0/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_0/group_std_mean": 0.18092852234840393,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001759897661395371,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001759897661395371,
"signal/frontier_coverage_1/centered_abs_mean": 0.14079180657863616,
"signal/frontier_coverage_1/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_1/group_std_mean": 0.18092852234840393,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001759897661395371,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001759897661395371,
"signal/frontier_coverage_10/centered_abs_mean": 0.13867213428020478,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_10/group_std_mean": 0.17820720970630646,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017334016738459468,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017334016738459468,
"signal/frontier_coverage_15/centered_abs_mean": 0.12698494046926498,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_15/group_std_mean": 0.1636158138513565,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015873117838054896,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015873117838054896,
"signal/frontier_coverage_20/centered_abs_mean": 0.07213507741689681,
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
"signal/frontier_coverage_20/group_std_mean": 0.09348605275154113,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009016884723678231,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009016884723678231,
"signal/frontier_coverage_25/centered_abs_mean": 0.05218314677476883,
"signal/frontier_coverage_25/group_bin_occupancy": 0.930078125,
"signal/frontier_coverage_25/group_std_mean": 0.0668656125664711,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006522893439978361,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006522893439978361,
"signal/frontier_coverage_5/centered_abs_mean": 0.13967403918504714,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_5/group_std_mean": 0.17946992814540863,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459255410358309,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459255410358309,
"signal/frontier_ece_reward/centered_abs_mean": 0.007050628308206797,
"signal/frontier_ece_reward/group_bin_occupancy": 0.812109375,
"signal/frontier_ece_reward/group_std_mean": 0.011934582144021988,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007050628308206796,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007050628308206796,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2894311249256134,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.711328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36359102725982667,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028943114355206488,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028943114355206488,
"step": 310
},
{
"calibration/aurc": 0.28309316461584144,
"calibration/batch_distribution_entropy": 0.9562440520005147,
"calibration/batch_entropy_100bins": 0.9321305829954412,
"calibration/batch_entropy_10bins": 0.9562440520005147,
"calibration/batch_entropy_50bins": 0.9521604451830256,
"calibration/batch_uniqueness": 0.9661712646484375,
"calibration/buffer_distribution_entropy": 0.9986985889836495,
"calibration/buffer_entropy_100bins": 0.9827654244956366,
"calibration/buffer_entropy_10bins": 0.9986985889836495,
"calibration/buffer_entropy_50bins": 0.994458498244382,
"calibration/confidence_entropy": 0.4959383976784825,
"calibration/coverage@0%": 0.013671875,
"calibration/coverage@1%": 0.013671875,
"calibration/coverage@10%": 0.07421875,
"calibration/coverage@15%": 0.1591796875,
"calibration/coverage@20%": 0.3056640625,
"calibration/coverage@25%": 0.3720703125,
"calibration/coverage@30%": 0.59765625,
"calibration/coverage@5%": 0.013671875,
"calibration/ece": 0.14605446661523436,
"calibration/mean_confidence": 0.6033933338027344,
"calibration/prompt_uniqueness": 0.8629150390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 391.0,
"completions/max_terminated_length": 391.0,
"completions/mean_length": 162.71500396728516,
"completions/mean_terminated_length": 162.71500396728516,
"completions/min_length": 73.5,
"completions/min_terminated_length": 73.5,
"epoch": 0.9984,
"num_tokens": 1048384942.0,
"reward": 0.9344164729118347,
"reward_std": 0.08154623582959175,
"rewards/accuracy_reward": 0.558837890625,
"rewards/brier_reward": 0.7741440236568451,
"rewards/confidence_uniqueness_reward": 0.9663105010986328,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0030248835682868958,
"rewards/frontier_coverage_0": 0.06300802156329155,
"rewards/frontier_coverage_1": 0.06300802156329155,
"rewards/frontier_coverage_10": 0.062442582100629807,
"rewards/frontier_coverage_15": 0.055601296946406364,
"rewards/frontier_coverage_20": 0.03809378854930401,
"rewards/frontier_coverage_25": 0.03736502677202225,
"rewards/frontier_coverage_5": 0.06258464232087135,
"rewards/frontier_ece_reward": 0.0019081256468780339,
"rewards/frontier_entropy_batch_reward": -0.23977214097976685,
"signal/accuracy_reward/centered_abs_mean": 0.0806732177734375,
"signal/accuracy_reward/group_bin_occupancy": 0.1630859375,
"signal/accuracy_reward/group_std_mean": 0.1065446101129055,
"signal/accuracy_reward/group_zero_std_frac": 0.6953125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04033660888671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04033660888671875,
"signal/advantage_abs_mean": 0.06426878273487091,
"signal/advantage_pre_scale_abs_mean": 0.06426878273487091,
"signal/advantage_pre_scale_std": 0.10264430195093155,
"signal/advantage_std": 0.10264430195093155,
"signal/brier_reward/centered_abs_mean": 0.11038177087903023,
"signal/brier_reward/group_bin_occupancy": 0.8701171875,
"signal/brier_reward/group_std_mean": 0.14014140516519547,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011038177646696568,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011038177646696568,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011780858039855957,
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8720703125,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015235808677971363,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011780858621932566,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011780858621932566,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002542344154790044,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7236328125,
"signal/frontier_aurc_reward/group_std_mean": 0.0040895091369748116,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1779301025380846e-05,
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1779301025380846e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13523942232131958,
"signal/frontier_coverage_0/group_bin_occupancy": 0.8798828125,
"signal/frontier_coverage_0/group_std_mean": 0.17214351892471313,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016904928488656878,
"signal/frontier_coverage_0/weight": 0.012500000186264515,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016904928488656878,
"signal/frontier_coverage_1/centered_abs_mean": 0.13523942232131958,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8798828125,
"signal/frontier_coverage_1/group_std_mean": 0.17214351892471313,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016904928488656878,
"signal/frontier_coverage_1/weight": 0.012500000186264515,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016904928488656878,
"signal/frontier_coverage_10/centered_abs_mean": 0.13328810781240463,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8740234375,
"signal/frontier_coverage_10/group_std_mean": 0.16966666281223297,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016661013942211866,
"signal/frontier_coverage_10/weight": 0.012500000186264515,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016661013942211866,
"signal/frontier_coverage_15/centered_abs_mean": 0.1213008388876915,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8662109375,
"signal/frontier_coverage_15/group_std_mean": 0.15479815006256104,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001516260497737676,
"signal/frontier_coverage_15/weight": 0.012500000186264515,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001516260497737676,
"signal/frontier_coverage_20/centered_abs_mean": 0.065843116492033,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8955078125,
"signal/frontier_coverage_20/group_std_mean": 0.08446861431002617,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008230389503296465,
"signal/frontier_coverage_20/weight": 0.012500000186264515,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008230389503296465,
"signal/frontier_coverage_25/centered_abs_mean": 0.04858388379216194,
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
"signal/frontier_coverage_25/group_std_mean": 0.06284799799323082,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006072985415812582,
"signal/frontier_coverage_25/weight": 0.012500000186264515,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006072985415812582,
"signal/frontier_coverage_5/centered_abs_mean": 0.1348385065793991,
"signal/frontier_coverage_5/group_bin_occupancy": 0.876953125,
"signal/frontier_coverage_5/group_std_mean": 0.17164986580610275,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001685481343884021,
"signal/frontier_coverage_5/weight": 0.012500000186264515,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001685481343884021,
"signal/frontier_ece_reward/centered_abs_mean": 0.006206750171259046,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8408203125,
"signal/frontier_ece_reward/group_std_mean": 0.009092409629374743,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006206750113051385,
"signal/frontier_ece_reward/weight": 0.10000000149011612,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006206750113051385,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30052025616168976,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7265625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37405627965927124,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03005202580243349,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03005202580243349,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.004423426932846315,
"train_runtime": 60056.3002,
"train_samples_per_second": 0.333,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1048384942,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}