11044 lines
696 KiB
JSON
11044 lines
696 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.6358064756244601,
|
|
"calibration/batch_distribution_entropy": 0.6431098183707868,
|
|
"calibration/batch_entropy_100bins": 0.48089187317226323,
|
|
"calibration/batch_entropy_10bins": 0.6431098183707868,
|
|
"calibration/batch_entropy_50bins": 0.5617938193030543,
|
|
"calibration/batch_uniqueness": 0.7219718974960545,
|
|
"calibration/confidence_entropy": 0.34696880251966167,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.49592420401806236,
|
|
"calibration/mean_confidence": 0.7925940600227801,
|
|
"calibration/prompt_uniqueness": 0.5942279192380695,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0345703125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1495.6,
|
|
"completions/mean_length": 270.69580078125,
|
|
"completions/mean_terminated_length": 225.39390869140624,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.06927429139614105,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0744,
|
|
"num_tokens": 17615957.0,
|
|
"reward": 0.533476448059082,
|
|
"reward_std": 0.4068940103054047,
|
|
"rewards/accuracy_reward": 0.219921875,
|
|
"rewards/brier_reward": 0.3760594606399536,
|
|
"rewards/confidence_uniqueness_reward": 0.48737336993217467,
|
|
"rewards/format_reward": 0.68427734375,
|
|
"rewards/frontier_aurc_reward": 0.30170206129550936,
|
|
"rewards/frontier_coverage_0": 0.30170206129550936,
|
|
"rewards/frontier_coverage_1": 0.30170206129550936,
|
|
"rewards/frontier_coverage_10": 0.30170206129550936,
|
|
"rewards/frontier_coverage_15": 0.30170206129550936,
|
|
"rewards/frontier_coverage_20": 0.30170206129550936,
|
|
"rewards/frontier_coverage_25": 0.30170206129550936,
|
|
"rewards/frontier_coverage_5": 0.30170206129550936,
|
|
"rewards/frontier_ece_reward": 0.30170206129550936,
|
|
"rewards/frontier_entropy_batch_reward": -0.6530686259269715,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.2394775390625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.21015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.28177876472473146,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.31875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11973876953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.11973876953125,
|
|
"signal/advantage_abs_mean": 0.34517702460289,
|
|
"signal/advantage_pre_scale_abs_mean": 0.34517702460289,
|
|
"signal/advantage_pre_scale_std": 0.4175687491893768,
|
|
"signal/advantage_std": 0.4175687491893768,
|
|
"signal/brier_reward/centered_abs_mean": 0.31782959699630736,
|
|
"signal/brier_reward/group_bin_occupancy": 0.747265625,
|
|
"signal/brier_reward/group_std_mean": 0.3630960941314697,
|
|
"signal/brier_reward/group_zero_std_frac": 0.003125,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031782958284020425,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.031782958284020425,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.29565892815589906,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.58359375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3465812742710114,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02956589199602604,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02956589199602604,
|
|
"signal/format_reward/centered_abs_mean": 0.399285888671875,
|
|
"signal/format_reward/group_bin_occupancy": 0.25,
|
|
"signal/format_reward/group_std_mean": 0.4503865897655487,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1996429443359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1996429443359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036364448722451927,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2909155905246735,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.65859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.34205764532089233,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02909155897796154,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02909155897796154,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.424519544839859,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.314453125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47118043899536133,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042451954632997516,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042451954632997516,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6633336947681945,
|
|
"calibration/batch_distribution_entropy": 0.6530785282030743,
|
|
"calibration/batch_entropy_100bins": 0.4852722322513416,
|
|
"calibration/batch_entropy_10bins": 0.6530785282030743,
|
|
"calibration/batch_entropy_50bins": 0.5661661966634106,
|
|
"calibration/batch_uniqueness": 0.7272441743970559,
|
|
"calibration/confidence_entropy": 0.3523645170870256,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.5257630611304681,
|
|
"calibration/mean_confidence": 0.7933062167842394,
|
|
"calibration/prompt_uniqueness": 0.6178022073084117,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1488.2,
|
|
"completions/mean_length": 261.59072265625,
|
|
"completions/mean_terminated_length": 211.972216796875,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.030047137290239334,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0764,
|
|
"num_tokens": 35394998.0,
|
|
"reward": 0.5451710224151611,
|
|
"reward_std": 0.38366069793701174,
|
|
"rewards/accuracy_reward": 0.20810546875,
|
|
"rewards/brier_reward": 0.3810562252998352,
|
|
"rewards/confidence_uniqueness_reward": 0.5187723219394684,
|
|
"rewards/format_reward": 0.7197265625,
|
|
"rewards/frontier_aurc_reward": 0.3000528335571289,
|
|
"rewards/frontier_coverage_0": 0.3000528335571289,
|
|
"rewards/frontier_coverage_1": 0.3000528335571289,
|
|
"rewards/frontier_coverage_10": 0.3000528335571289,
|
|
"rewards/frontier_coverage_15": 0.3000528335571289,
|
|
"rewards/frontier_coverage_20": 0.3000528335571289,
|
|
"rewards/frontier_coverage_25": 0.3000528335571289,
|
|
"rewards/frontier_coverage_5": 0.3000528335571289,
|
|
"rewards/frontier_ece_reward": 0.3000528335571289,
|
|
"rewards/frontier_entropy_batch_reward": -0.6873842597007751,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.216424560546875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.26217670142650606,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.34375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1082122802734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1082122802734375,
|
|
"signal/advantage_abs_mean": 0.3162668466567993,
|
|
"signal/advantage_pre_scale_abs_mean": 0.3162668466567993,
|
|
"signal/advantage_pre_scale_std": 0.3942062079906464,
|
|
"signal/advantage_std": 0.3942062079906464,
|
|
"signal/brier_reward/centered_abs_mean": 0.3037886917591095,
|
|
"signal/brier_reward/group_bin_occupancy": 0.775390625,
|
|
"signal/brier_reward/group_std_mean": 0.3516114354133606,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03037887029349804,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03037887029349804,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.28001424074172976,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.580859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3388310194015503,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028001424670219422,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.028001424670219422,
|
|
"signal/format_reward/centered_abs_mean": 0.37132568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.249609375,
|
|
"signal/format_reward/group_std_mean": 0.4337587058544159,
|
|
"signal/format_reward/group_zero_std_frac": 0.003125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.185662841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.185662841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003451125044375658,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.27608999609947205,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.683203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.33003708720207214,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.027609000355005263,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.027609000355005263,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39845545291900636,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3171875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4560263633728027,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03984554782509804,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03984554782509804,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6056235500133583,
|
|
"calibration/batch_distribution_entropy": 0.6372280867706955,
|
|
"calibration/batch_entropy_100bins": 0.48009095551927256,
|
|
"calibration/batch_entropy_10bins": 0.6372280867706955,
|
|
"calibration/batch_entropy_50bins": 0.5595683840082752,
|
|
"calibration/batch_uniqueness": 0.7113122520911674,
|
|
"calibration/buffer_distribution_entropy": 0.6568801862675887,
|
|
"calibration/buffer_entropy_100bins": 0.49209269792202925,
|
|
"calibration/buffer_entropy_10bins": 0.6568801862675887,
|
|
"calibration/buffer_entropy_50bins": 0.5730805301755447,
|
|
"calibration/confidence_entropy": 0.35123976578789656,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.47231641548769077,
|
|
"calibration/mean_confidence": 0.804845781710738,
|
|
"calibration/prompt_uniqueness": 0.6089974924774788,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.01650390625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1428.2,
|
|
"completions/mean_length": 200.514453125,
|
|
"completions/mean_terminated_length": 178.22185363769532,
|
|
"completions/min_length": 9.6,
|
|
"completions/min_terminated_length": 9.6,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.05051470175385475,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0486,
|
|
"num_tokens": 52497002.0,
|
|
"reward": 0.665993869304657,
|
|
"reward_std": 0.3059393674135208,
|
|
"rewards/accuracy_reward": 0.27236328125,
|
|
"rewards/brier_reward": 0.48226693272590637,
|
|
"rewards/confidence_uniqueness_reward": 0.6447442531585693,
|
|
"rewards/format_reward": 0.8810546875,
|
|
"rewards/frontier_aurc_reward": 0.29981047259643673,
|
|
"rewards/frontier_coverage_0": 0.3134632341563702,
|
|
"rewards/frontier_coverage_1": 0.3134632341563702,
|
|
"rewards/frontier_coverage_10": 0.3134632341563702,
|
|
"rewards/frontier_coverage_15": 0.3134632341563702,
|
|
"rewards/frontier_coverage_20": 0.3134632341563702,
|
|
"rewards/frontier_coverage_25": 0.3134632341563702,
|
|
"rewards/frontier_coverage_5": 0.3134632341563702,
|
|
"rewards/frontier_ece_reward": 0.2883337765932083,
|
|
"rewards/frontier_entropy_batch_reward": -0.8342528104782104,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.202545166015625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.207421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2523681789636612,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1012725830078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1012725830078125,
|
|
"signal/advantage_abs_mean": 0.23214915990829468,
|
|
"signal/advantage_pre_scale_abs_mean": 0.23214915990829468,
|
|
"signal/advantage_pre_scale_std": 0.31858267784118655,
|
|
"signal/advantage_std": 0.31858267784118655,
|
|
"signal/brier_reward/centered_abs_mean": 0.2716783404350281,
|
|
"signal/brier_reward/group_bin_occupancy": 0.81015625,
|
|
"signal/brier_reward/group_std_mean": 0.3263775706291199,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02716783434152603,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02716783434152603,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1999937564134598,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.597265625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.26394935250282286,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019999375380575658,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.019999375380575658,
|
|
"signal/format_reward/centered_abs_mean": 0.19697265625,
|
|
"signal/format_reward/group_bin_occupancy": 0.24140625,
|
|
"signal/format_reward/group_std_mean": 0.2975906074047089,
|
|
"signal/format_reward/group_zero_std_frac": 0.06875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.098486328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.098486328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.215498910844326,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.2603446511551738,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0026937363953038586,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0026937363953038586,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.23299580514431,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2876336514949799,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029124475782737135,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.24313633441925048,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.712890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.29327360093593596,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.024313633516430854,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.024313633516430854,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26260979771614074,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.334375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3697131097316742,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626098096370697,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626098096370697,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5229047865146916,
|
|
"calibration/batch_distribution_entropy": 0.6965577173291834,
|
|
"calibration/batch_entropy_100bins": 0.5146396132435052,
|
|
"calibration/batch_entropy_10bins": 0.6965577173291834,
|
|
"calibration/batch_entropy_50bins": 0.6023062687429694,
|
|
"calibration/batch_uniqueness": 0.7574193666928948,
|
|
"calibration/buffer_distribution_entropy": 0.657240172374993,
|
|
"calibration/buffer_entropy_100bins": 0.49448125756617145,
|
|
"calibration/buffer_entropy_10bins": 0.657240172374993,
|
|
"calibration/buffer_entropy_50bins": 0.5759482684510908,
|
|
"calibration/confidence_entropy": 0.37992020571579327,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3746292271322627,
|
|
"calibration/mean_confidence": 0.7828071817671975,
|
|
"calibration/prompt_uniqueness": 0.6806407808231312,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00361328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1317.4,
|
|
"completions/mean_length": 137.51865234375,
|
|
"completions/mean_terminated_length": 132.4542221069336,
|
|
"completions/min_length": 24.4,
|
|
"completions/min_terminated_length": 24.4,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.008556111715734005,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0126,
|
|
"num_tokens": 68823593.0,
|
|
"reward": 0.7013731718063354,
|
|
"reward_std": 0.2047277569770813,
|
|
"rewards/accuracy_reward": 0.3408203125,
|
|
"rewards/brier_reward": 0.5739028096199036,
|
|
"rewards/confidence_uniqueness_reward": 0.7480879545211792,
|
|
"rewards/format_reward": 0.97705078125,
|
|
"rewards/frontier_aurc_reward": -0.006883773859590292,
|
|
"rewards/frontier_coverage_0": 0.06146884858608246,
|
|
"rewards/frontier_coverage_1": 0.06146884858608246,
|
|
"rewards/frontier_coverage_10": 0.06146884858608246,
|
|
"rewards/frontier_coverage_15": 0.06146884858608246,
|
|
"rewards/frontier_coverage_20": 0.06146884858608246,
|
|
"rewards/frontier_coverage_25": 0.06146884858608246,
|
|
"rewards/frontier_coverage_5": 0.06146884858608246,
|
|
"rewards/frontier_ece_reward": -0.050785575062036514,
|
|
"rewards/frontier_entropy_batch_reward": -0.8997539043426513,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.211279296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20625,
|
|
"signal/accuracy_reward/group_std_mean": 0.25800455510616305,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1056396484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1056396484375,
|
|
"signal/advantage_abs_mean": 0.1570647269487381,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1570647269487381,
|
|
"signal/advantage_pre_scale_std": 0.22092486619949342,
|
|
"signal/advantage_std": 0.22092486619949342,
|
|
"signal/brier_reward/centered_abs_mean": 0.24524094462394713,
|
|
"signal/brier_reward/group_bin_occupancy": 0.844140625,
|
|
"signal/brier_reward/group_std_mean": 0.3004362642765045,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02452409528195858,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02452409528195858,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1221130445599556,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.6515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1624012291431427,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012211304530501366,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012211304530501366,
|
|
"signal/format_reward/centered_abs_mean": 0.043353271484375,
|
|
"signal/format_reward/group_bin_occupancy": 0.190234375,
|
|
"signal/format_reward/group_std_mean": 0.10569706857204438,
|
|
"signal/format_reward/group_zero_std_frac": 0.478125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0216766357421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0216766357421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.005248846765607595,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.007484708447009325,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.56105883535929e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.56105883535929e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10489135384559631,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.695703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1639949709177017,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013111419510096311,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.14598130881786348,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.7453125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.17359468340873718,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014598131738603115,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014598131738603115,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.17333437800407409,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.3671875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.29973788261413575,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.06875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.017333437874913215,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.017333437874913215,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6283088834520059,
|
|
"calibration/batch_distribution_entropy": 0.8095181952478757,
|
|
"calibration/batch_entropy_100bins": 0.6000067690369304,
|
|
"calibration/batch_entropy_10bins": 0.8095181952478757,
|
|
"calibration/batch_entropy_50bins": 0.6862870972074454,
|
|
"calibration/batch_uniqueness": 0.831063874562125,
|
|
"calibration/buffer_distribution_entropy": 0.6868147973448078,
|
|
"calibration/buffer_entropy_100bins": 0.515115289448944,
|
|
"calibration/buffer_entropy_10bins": 0.6868147973448078,
|
|
"calibration/buffer_entropy_50bins": 0.5986524147896154,
|
|
"calibration/confidence_entropy": 0.45480330324323653,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3972159911131528,
|
|
"calibration/mean_confidence": 0.7111007678831154,
|
|
"calibration/prompt_uniqueness": 0.7651551820929442,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0013671875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 733.0,
|
|
"completions/mean_length": 118.575,
|
|
"completions/mean_terminated_length": 116.63536529541015,
|
|
"completions/min_length": 35.8,
|
|
"completions/min_terminated_length": 35.8,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.013943054713308811,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0027,
|
|
"num_tokens": 84970953.0,
|
|
"reward": 0.735591733455658,
|
|
"reward_std": 0.1745338499546051,
|
|
"rewards/accuracy_reward": 0.35390625,
|
|
"rewards/brier_reward": 0.6262224793434144,
|
|
"rewards/confidence_uniqueness_reward": 0.8280656814575196,
|
|
"rewards/format_reward": 0.9927734375,
|
|
"rewards/frontier_aurc_reward": -0.005787147860974074,
|
|
"rewards/frontier_coverage_0": 0.06969771385192872,
|
|
"rewards/frontier_coverage_1": 0.06969771385192872,
|
|
"rewards/frontier_coverage_10": 0.06969771385192872,
|
|
"rewards/frontier_coverage_15": 0.06969771385192872,
|
|
"rewards/frontier_coverage_20": 0.06969771385192872,
|
|
"rewards/frontier_coverage_25": 0.06969771385192872,
|
|
"rewards/frontier_coverage_5": 0.06969771385192872,
|
|
"rewards/frontier_ece_reward": -0.04099251367151737,
|
|
"rewards/frontier_entropy_batch_reward": -0.8510387659072876,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.19227294921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
|
|
"signal/accuracy_reward/group_std_mean": 0.24025425910949708,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.096136474609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.096136474609375,
|
|
"signal/advantage_abs_mean": 0.13553658425807952,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13553658425807952,
|
|
"signal/advantage_pre_scale_std": 0.19173393845558168,
|
|
"signal/advantage_std": 0.19173393845558168,
|
|
"signal/brier_reward/centered_abs_mean": 0.2213940680027008,
|
|
"signal/brier_reward/group_bin_occupancy": 0.88359375,
|
|
"signal/brier_reward/group_std_mean": 0.2728204667568207,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022139406949281692,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022139406949281692,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07314713597297669,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.72421875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10160589665174484,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0073147137649357315,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0073147137649357315,
|
|
"signal/format_reward/centered_abs_mean": 0.01385498046875,
|
|
"signal/format_reward/group_bin_occupancy": 0.14921875,
|
|
"signal/format_reward/group_std_mean": 0.0368439082056284,
|
|
"signal/format_reward/group_zero_std_frac": 0.80625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006927490234375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.006927490234375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0035542991012334824,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005189351085573435,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.442873832886107e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.442873832886107e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13423685133457183,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.77578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1972368836402893,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016779606696218253,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.13046298176050186,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.812890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1628311574459076,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013046298176050186,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013046298176050186,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24598013758659362,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.45390625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38413644433021543,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02459801435470581,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02459801435470581,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.648785096123655,
|
|
"calibration/batch_distribution_entropy": 0.9380209005564305,
|
|
"calibration/batch_entropy_100bins": 0.7781107329568903,
|
|
"calibration/batch_entropy_10bins": 0.9380209005564305,
|
|
"calibration/batch_entropy_50bins": 0.850847448214451,
|
|
"calibration/batch_uniqueness": 0.9082612624262516,
|
|
"calibration/buffer_distribution_entropy": 0.7433120040362949,
|
|
"calibration/buffer_entropy_100bins": 0.564207235491536,
|
|
"calibration/buffer_entropy_10bins": 0.7433120040362949,
|
|
"calibration/buffer_entropy_50bins": 0.6484998987649865,
|
|
"calibration/confidence_entropy": 0.5158145753859638,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.29242145964555044,
|
|
"calibration/mean_confidence": 0.5788182740863752,
|
|
"calibration/prompt_uniqueness": 0.8489781667317032,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0017578125,
|
|
"completions/max_length": 1433.0,
|
|
"completions/max_terminated_length": 763.8,
|
|
"completions/mean_length": 115.12734375,
|
|
"completions/mean_terminated_length": 112.62236328125,
|
|
"completions/min_length": 37.8,
|
|
"completions/min_terminated_length": 37.8,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.0034602871164679527,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0056,
|
|
"num_tokens": 101194465.0,
|
|
"reward": 0.7778663992881775,
|
|
"reward_std": 0.16724584996700287,
|
|
"rewards/accuracy_reward": 0.35966796875,
|
|
"rewards/brier_reward": 0.6825961947441102,
|
|
"rewards/confidence_uniqueness_reward": 0.906465494632721,
|
|
"rewards/format_reward": 0.99453125,
|
|
"rewards/frontier_aurc_reward": -0.0052437069825828075,
|
|
"rewards/frontier_coverage_0": 0.10269922763109207,
|
|
"rewards/frontier_coverage_1": 0.10269922763109207,
|
|
"rewards/frontier_coverage_10": 0.10269922763109207,
|
|
"rewards/frontier_coverage_15": 0.10269922763109207,
|
|
"rewards/frontier_coverage_20": 0.10269922763109207,
|
|
"rewards/frontier_coverage_25": 0.10269922763109207,
|
|
"rewards/frontier_coverage_5": 0.10269922763109207,
|
|
"rewards/frontier_ece_reward": -0.03292221836745739,
|
|
"rewards/frontier_entropy_batch_reward": -0.6376779556274415,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.185467529296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.23311618864536285,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.38125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0927337646484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0927337646484375,
|
|
"signal/advantage_abs_mean": 0.13114093095064164,
|
|
"signal/advantage_pre_scale_abs_mean": 0.13114093095064164,
|
|
"signal/advantage_pre_scale_std": 0.1809857577085495,
|
|
"signal/advantage_std": 0.1809857577085495,
|
|
"signal/brier_reward/centered_abs_mean": 0.22195914387702942,
|
|
"signal/brier_reward/group_bin_occupancy": 0.914453125,
|
|
"signal/brier_reward/group_std_mean": 0.2706751048564911,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022195914760231972,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022195914760231972,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05482520312070847,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.744921875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0789007768034935,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005482520535588264,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005482520535588264,
|
|
"signal/format_reward/centered_abs_mean": 0.01048583984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.144140625,
|
|
"signal/format_reward/group_std_mean": 0.02846333533525467,
|
|
"signal/format_reward/group_zero_std_frac": 0.846875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005242919921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.005242919921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026835352182388306,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004022491350769997,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3544190227985385e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3544190227985385e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20481350123882294,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88515625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2719453454017639,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025601688772439956,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.12130876779556274,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83046875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.1663817882537842,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.012130877003073692,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.012130877003073692,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44812787771224977,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.614453125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5313847541809082,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0448127880692482,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0448127880692482,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5053299592442537,
|
|
"calibration/batch_distribution_entropy": 0.9488818143342655,
|
|
"calibration/batch_entropy_100bins": 0.9306714343967982,
|
|
"calibration/batch_entropy_10bins": 0.9488818143342655,
|
|
"calibration/batch_entropy_50bins": 0.9460526008785772,
|
|
"calibration/batch_uniqueness": 0.9490444932260778,
|
|
"calibration/buffer_distribution_entropy": 0.8224099484197692,
|
|
"calibration/buffer_entropy_100bins": 0.6592172461309497,
|
|
"calibration/buffer_entropy_10bins": 0.8224099484197692,
|
|
"calibration/buffer_entropy_50bins": 0.7358472349793205,
|
|
"calibration/confidence_entropy": 0.5183275438944214,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0027450980392156863,
|
|
"calibration/coverage@20%": 0.0027450980392156863,
|
|
"calibration/coverage@25%": 0.008627450980392156,
|
|
"calibration/coverage@30%": 0.020375273397030044,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.1796229260918823,
|
|
"calibration/mean_confidence": 0.40559769162291126,
|
|
"calibration/prompt_uniqueness": 0.8886733536752803,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 576.4,
|
|
"completions/mean_length": 106.2763671875,
|
|
"completions/mean_terminated_length": 104.87900238037109,
|
|
"completions/min_length": 36.0,
|
|
"completions/min_terminated_length": 36.0,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.0038092397153377533,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 117392207.0,
|
|
"reward": 0.8370797395706177,
|
|
"reward_std": 0.13981394171714784,
|
|
"rewards/accuracy_reward": 0.39765625,
|
|
"rewards/brier_reward": 0.7146290063858032,
|
|
"rewards/confidence_uniqueness_reward": 0.9453599929809571,
|
|
"rewards/format_reward": 0.99765625,
|
|
"rewards/frontier_aurc_reward": -0.004691840149462223,
|
|
"rewards/frontier_coverage_0": 0.11487203687429429,
|
|
"rewards/frontier_coverage_1": 0.11487203687429429,
|
|
"rewards/frontier_coverage_10": 0.11487203687429429,
|
|
"rewards/frontier_coverage_15": 0.11487203687429429,
|
|
"rewards/frontier_coverage_20": 0.11487203687429429,
|
|
"rewards/frontier_coverage_25": 0.11487203687429429,
|
|
"rewards/frontier_coverage_5": 0.11487203687429429,
|
|
"rewards/frontier_ece_reward": -0.006113046361133456,
|
|
"rewards/frontier_entropy_batch_reward": -0.35956743359565735,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1920654296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.20625,
|
|
"signal/accuracy_reward/group_std_mean": 0.24405551552772523,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.35,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.09603271484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09603271484375,
|
|
"signal/advantage_abs_mean": 0.10965722203254699,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10965722203254699,
|
|
"signal/advantage_pre_scale_std": 0.15251348316669464,
|
|
"signal/advantage_std": 0.15251348316669464,
|
|
"signal/brier_reward/centered_abs_mean": 0.20627183914184571,
|
|
"signal/brier_reward/group_bin_occupancy": 0.90703125,
|
|
"signal/brier_reward/group_std_mean": 0.25678886771202086,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020627183839678764,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020627183839678764,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026137924194335936,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03944253027439117,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002613792475312948,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002613792475312948,
|
|
"signal/format_reward/centered_abs_mean": 0.00452880859375,
|
|
"signal/format_reward/group_bin_occupancy": 0.133984375,
|
|
"signal/format_reward/group_std_mean": 0.012921943515539169,
|
|
"signal/format_reward/group_zero_std_frac": 0.928125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002264404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.002264404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016326952259987592,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026589396875351667,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0408690397744066e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0408690397744066e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2962383508682251,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.94453125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3698026418685913,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003702979441732168,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06853184774518013,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.810546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.10600927323102952,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006853185035288334,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006853185035288334,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.421990305185318,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.48495404720306395,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.04219903200864792,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04219903200864792,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5568013985695004,
|
|
"calibration/batch_distribution_entropy": 0.9200766038929988,
|
|
"calibration/batch_entropy_100bins": 0.9329721377257479,
|
|
"calibration/batch_entropy_10bins": 0.9200766038929988,
|
|
"calibration/batch_entropy_50bins": 0.9367769710746388,
|
|
"calibration/batch_uniqueness": 0.942095789057279,
|
|
"calibration/buffer_distribution_entropy": 0.8899342638622837,
|
|
"calibration/buffer_entropy_100bins": 0.7478820117698411,
|
|
"calibration/buffer_entropy_10bins": 0.8899342638622837,
|
|
"calibration/buffer_entropy_50bins": 0.8124210230759926,
|
|
"calibration/confidence_entropy": 0.4988997779411394,
|
|
"calibration/coverage@0%": 0.001175703157975519,
|
|
"calibration/coverage@1%": 0.001175703157975519,
|
|
"calibration/coverage@10%": 0.004705114922681402,
|
|
"calibration/coverage@15%": 0.004705114922681402,
|
|
"calibration/coverage@20%": 0.0074502129618970875,
|
|
"calibration/coverage@25%": 0.008626683550132382,
|
|
"calibration/coverage@30%": 0.018822761981504933,
|
|
"calibration/coverage@5%": 0.001175703157975519,
|
|
"calibration/ece": 0.18915972842799716,
|
|
"calibration/mean_confidence": 0.34738454918178696,
|
|
"calibration/prompt_uniqueness": 0.8832753255885797,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00126953125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 759.6,
|
|
"completions/mean_length": 107.24765625,
|
|
"completions/mean_terminated_length": 105.431201171875,
|
|
"completions/min_length": 41.0,
|
|
"completions/min_terminated_length": 41.0,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.0016961501678451896,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0051,
|
|
"num_tokens": 133407095.0,
|
|
"reward": 0.8337414741516114,
|
|
"reward_std": 0.12419430166482925,
|
|
"rewards/accuracy_reward": 0.39267578125,
|
|
"rewards/brier_reward": 0.7212756514549256,
|
|
"rewards/confidence_uniqueness_reward": 0.9402257800102234,
|
|
"rewards/format_reward": 0.99736328125,
|
|
"rewards/frontier_aurc_reward": -0.004438658151775599,
|
|
"rewards/frontier_coverage_0": 0.12889230251312256,
|
|
"rewards/frontier_coverage_1": 0.12889230251312256,
|
|
"rewards/frontier_coverage_10": 0.12889230251312256,
|
|
"rewards/frontier_coverage_15": 0.12889230251312256,
|
|
"rewards/frontier_coverage_20": 0.12889230251312256,
|
|
"rewards/frontier_coverage_25": 0.12889230251312256,
|
|
"rewards/frontier_coverage_5": 0.12889230251312256,
|
|
"rewards/frontier_ece_reward": 0.0015361378580564633,
|
|
"rewards/frontier_entropy_batch_reward": -0.3880440592765808,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.167462158203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.197265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.21296925246715545,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.421875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0837310791015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0837310791015625,
|
|
"signal/advantage_abs_mean": 0.09632081687450408,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09632081687450408,
|
|
"signal/advantage_pre_scale_std": 0.13876967430114745,
|
|
"signal/advantage_std": 0.13876967430114745,
|
|
"signal/brier_reward/centered_abs_mean": 0.19647954106330873,
|
|
"signal/brier_reward/group_bin_occupancy": 0.883984375,
|
|
"signal/brier_reward/group_std_mean": 0.248234623670578,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019647954031825066,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019647954031825066,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024874152988195418,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.905859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03841259628534317,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0024874153779819606,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024874153779819606,
|
|
"signal/format_reward/centered_abs_mean": 0.005108642578125,
|
|
"signal/format_reward/group_bin_occupancy": 0.135546875,
|
|
"signal/format_reward/group_std_mean": 0.014915533270686865,
|
|
"signal/format_reward/group_zero_std_frac": 0.915625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025543212890625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0025543212890625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013979610754176973,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.748046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022263232618570327,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7474513515480795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7474513515480795e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.3031778931617737,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9390625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3761015355587006,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037897238973528145,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.05225505083799362,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.816796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0818573072552681,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00522550530731678,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00522550530731678,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.420942884683609,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.778515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.483720475435257,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.042094288021326066,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042094288021326066,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4175940179177261,
|
|
"calibration/batch_distribution_entropy": 0.979250693438256,
|
|
"calibration/batch_entropy_100bins": 0.9653378707811878,
|
|
"calibration/batch_entropy_10bins": 0.979250693438256,
|
|
"calibration/batch_entropy_50bins": 0.9767701739682157,
|
|
"calibration/batch_uniqueness": 0.9545605405154486,
|
|
"calibration/buffer_distribution_entropy": 0.9248228875256566,
|
|
"calibration/buffer_entropy_100bins": 0.8047559482027328,
|
|
"calibration/buffer_entropy_10bins": 0.9248228875256566,
|
|
"calibration/buffer_entropy_50bins": 0.8588074955955491,
|
|
"calibration/confidence_entropy": 0.5313858867098616,
|
|
"calibration/coverage@0%": 0.0011734038649706458,
|
|
"calibration/coverage@1%": 0.0011734038649706458,
|
|
"calibration/coverage@10%": 0.012892153864970645,
|
|
"calibration/coverage@15%": 0.014454653864970645,
|
|
"calibration/coverage@20%": 0.07773590386497065,
|
|
"calibration/coverage@25%": 0.20117340386497062,
|
|
"calibration/coverage@30%": 0.20430451932485322,
|
|
"calibration/coverage@5%": 0.0011734038649706458,
|
|
"calibration/ece": 0.24015715380201258,
|
|
"calibration/mean_confidence": 0.4763771020182851,
|
|
"calibration/prompt_uniqueness": 0.894472107505203,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1201.6,
|
|
"completions/max_terminated_length": 711.0,
|
|
"completions/mean_length": 107.3626953125,
|
|
"completions/mean_terminated_length": 106.80433654785156,
|
|
"completions/min_length": 41.8,
|
|
"completions/min_terminated_length": 41.8,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.002021110150963068,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 149456921.0,
|
|
"reward": 0.8988601326942444,
|
|
"reward_std": 0.13267979323863982,
|
|
"rewards/accuracy_reward": 0.50439453125,
|
|
"rewards/brier_reward": 0.7083608627319335,
|
|
"rewards/confidence_uniqueness_reward": 0.9530706882476807,
|
|
"rewards/format_reward": 0.9990234375,
|
|
"rewards/frontier_aurc_reward": -0.003968859650194645,
|
|
"rewards/frontier_coverage_0": 0.02937074126675725,
|
|
"rewards/frontier_coverage_1": 0.02937074126675725,
|
|
"rewards/frontier_coverage_10": 0.02937074126675725,
|
|
"rewards/frontier_coverage_15": 0.02937074126675725,
|
|
"rewards/frontier_coverage_20": 0.02937074126675725,
|
|
"rewards/frontier_coverage_25": 0.02937074126675725,
|
|
"rewards/frontier_coverage_5": 0.02937074126675725,
|
|
"rewards/frontier_ece_reward": 0.006849961820989847,
|
|
"rewards/frontier_entropy_batch_reward": -0.22197339236736296,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.164532470703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.202734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.21746462881565093,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.378125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0822662353515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0822662353515625,
|
|
"signal/advantage_abs_mean": 0.1036305546760559,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1036305546760559,
|
|
"signal/advantage_pre_scale_std": 0.14524299502372742,
|
|
"signal/advantage_std": 0.14524299502372742,
|
|
"signal/brier_reward/centered_abs_mean": 0.20842026472091674,
|
|
"signal/brier_reward/group_bin_occupancy": 0.92578125,
|
|
"signal/brier_reward/group_std_mean": 0.2574777901172638,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02084202691912651,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02084202691912651,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015083288960158824,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.941015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02174595184624195,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001508328877389431,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001508328877389431,
|
|
"signal/format_reward/centered_abs_mean": 0.00189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.12890625,
|
|
"signal/format_reward/group_std_mean": 0.005524271540343762,
|
|
"signal/format_reward/group_zero_std_frac": 0.96875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020871605491265656,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.786328125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003077511163428426,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6089507082360795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6089507082360795e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.261399644613266,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.93359375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3283530294895172,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032674957066774367,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0648654729127884,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.096609228849411,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006486547738313675,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006486547738313675,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3109690427780151,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.38899595737457277,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03109690472483635,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03109690472483635,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.45887670957463256,
|
|
"calibration/batch_distribution_entropy": 0.986413046557384,
|
|
"calibration/batch_entropy_100bins": 0.9701950864389216,
|
|
"calibration/batch_entropy_10bins": 0.986413046557384,
|
|
"calibration/batch_entropy_50bins": 0.9786926740327588,
|
|
"calibration/batch_uniqueness": 0.9566806171713684,
|
|
"calibration/buffer_distribution_entropy": 0.94049635911715,
|
|
"calibration/buffer_entropy_100bins": 0.8437552164763602,
|
|
"calibration/buffer_entropy_10bins": 0.94049635911715,
|
|
"calibration/buffer_entropy_50bins": 0.8891188471793313,
|
|
"calibration/confidence_entropy": 0.5183146562190272,
|
|
"calibration/coverage@0%": 0.000392156862745098,
|
|
"calibration/coverage@1%": 0.000392156862745098,
|
|
"calibration/coverage@10%": 0.000392156862745098,
|
|
"calibration/coverage@15%": 0.000392156862745098,
|
|
"calibration/coverage@20%": 0.005079656862745098,
|
|
"calibration/coverage@25%": 0.017189031862745098,
|
|
"calibration/coverage@30%": 0.025001531862745098,
|
|
"calibration/coverage@5%": 0.000392156862745098,
|
|
"calibration/ece": 0.16821106842628572,
|
|
"calibration/mean_confidence": 0.5462577650957192,
|
|
"calibration/prompt_uniqueness": 0.895401010503382,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1131.4,
|
|
"completions/max_terminated_length": 745.4,
|
|
"completions/mean_length": 116.3083984375,
|
|
"completions/mean_terminated_length": 115.8933090209961,
|
|
"completions/min_length": 46.4,
|
|
"completions/min_terminated_length": 46.4,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0017517129890620708,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 165668847.0,
|
|
"reward": 0.8806891083717346,
|
|
"reward_std": 0.1352734684944153,
|
|
"rewards/accuracy_reward": 0.45361328125,
|
|
"rewards/brier_reward": 0.7112634301185607,
|
|
"rewards/confidence_uniqueness_reward": 0.9563660025596619,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0043065791018307206,
|
|
"rewards/frontier_coverage_0": 0.06639667674899101,
|
|
"rewards/frontier_coverage_1": 0.06639667674899101,
|
|
"rewards/frontier_coverage_10": 0.06639667674899101,
|
|
"rewards/frontier_coverage_15": 0.06639667674899101,
|
|
"rewards/frontier_coverage_20": 0.06639667674899101,
|
|
"rewards/frontier_coverage_25": 0.06639667674899101,
|
|
"rewards/frontier_coverage_5": 0.06639667674899101,
|
|
"rewards/frontier_ece_reward": 0.005921919783577323,
|
|
"rewards/frontier_entropy_batch_reward": -0.18935585916042327,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.159417724609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.20114850401878356,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0797088623046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0797088623046875,
|
|
"signal/advantage_abs_mean": 0.10809851884841919,
|
|
"signal/advantage_pre_scale_abs_mean": 0.10809851884841919,
|
|
"signal/advantage_pre_scale_std": 0.1511505126953125,
|
|
"signal/advantage_std": 0.1511505126953125,
|
|
"signal/brier_reward/centered_abs_mean": 0.21221804320812226,
|
|
"signal/brier_reward/group_bin_occupancy": 0.920703125,
|
|
"signal/brier_reward/group_std_mean": 0.2602735161781311,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02122180461883545,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02122180461883545,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01266609001904726,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.947265625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017678024619817732,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012666089925915003,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012666089925915003,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002775211539119482,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.80078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0039797000586986545,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4690145548665895e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4690145548665895e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2263072282075882,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2919350802898407,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028288405854254963,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06984314173460007,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0957074835896492,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006984313949942589,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006984313949942589,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799877405166626,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.755078125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35995004177093504,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027998774126172066,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027998774126172066,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.6514437234622675,
|
|
"eval_calibration/batch_distribution_entropy": 0.9300585720206449,
|
|
"eval_calibration/batch_entropy_100bins": 0.6905406002692105,
|
|
"eval_calibration/batch_entropy_10bins": 0.9300585720206449,
|
|
"eval_calibration/batch_entropy_50bins": 0.7765363996158673,
|
|
"eval_calibration/batch_uniqueness": 0.8994140625,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9462761211614454,
|
|
"eval_calibration/buffer_entropy_100bins": 0.8623138320227769,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9462761211614454,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9029670207624898,
|
|
"eval_calibration/confidence_entropy": 0.5075045392400461,
|
|
"eval_calibration/coverage@0%": 0.0078125,
|
|
"eval_calibration/coverage@1%": 0.0078125,
|
|
"eval_calibration/coverage@10%": 0.0078125,
|
|
"eval_calibration/coverage@15%": 0.0078125,
|
|
"eval_calibration/coverage@20%": 0.0078125,
|
|
"eval_calibration/coverage@25%": 0.0078125,
|
|
"eval_calibration/coverage@30%": 0.0078125,
|
|
"eval_calibration/coverage@5%": 0.0078125,
|
|
"eval_calibration/ece": 0.310032909152462,
|
|
"eval_calibration/mean_confidence": 0.5270542874048246,
|
|
"eval_calibration/prompt_uniqueness": 0.8994140625,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 294.75,
|
|
"eval_completions/max_terminated_length": 294.75,
|
|
"eval_completions/mean_length": 126.42483901977539,
|
|
"eval_completions/mean_terminated_length": 126.42483901977539,
|
|
"eval_completions/min_length": 61.5,
|
|
"eval_completions/min_terminated_length": 61.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 165668847.0,
|
|
"eval_reward": 0.7603507339954376,
|
|
"eval_reward_std": 0.23446262627840042,
|
|
"eval_rewards/accuracy_reward": 0.3828125,
|
|
"eval_rewards/brier_reward": 0.6980591118335724,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.898681640625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004815749707631767,
|
|
"eval_rewards/frontier_coverage_0": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_1": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_10": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_15": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_20": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_25": 0.10749666392803192,
|
|
"eval_rewards/frontier_coverage_5": 0.10749666392803192,
|
|
"eval_rewards/frontier_ece_reward": -0.0007533840253017843,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 17.0809,
|
|
"eval_samples_per_second": 29.272,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.45849609375,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.485101580619812,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.229248046875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.229248046875,
|
|
"eval_signal/advantage_abs_mean": 0.21446801349520683,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21446801349520683,
|
|
"eval_signal/advantage_pre_scale_std": 0.23238081485033035,
|
|
"eval_signal/advantage_std": 0.23238081485033035,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23739226162433624,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.96875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.28240957856178284,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02373922662809491,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02373922662809491,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0388031005859375,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04573572054505348,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038803101051598787,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038803101051598787,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003774499346036464,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.859375,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0055051157251000404,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7181244553939905e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7181244553939905e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3006228432059288,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4013464003801346,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037577852490358055,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.07121825404465199,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.10425052046775818,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007121825474314392,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007121825474314392,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.234,
|
|
"step": 50
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41083644913153056,
|
|
"calibration/batch_distribution_entropy": 0.9935570906004912,
|
|
"calibration/batch_entropy_100bins": 0.9715693910526587,
|
|
"calibration/batch_entropy_10bins": 0.9935570906004912,
|
|
"calibration/batch_entropy_50bins": 0.9855893029338851,
|
|
"calibration/batch_uniqueness": 0.9587646484375,
|
|
"calibration/buffer_distribution_entropy": 0.9504008284813109,
|
|
"calibration/buffer_entropy_100bins": 0.8723124863154184,
|
|
"calibration/buffer_entropy_10bins": 0.9504008284813109,
|
|
"calibration/buffer_entropy_50bins": 0.9103123756016671,
|
|
"calibration/confidence_entropy": 0.4926616876541866,
|
|
"calibration/coverage@0%": 0.00078125,
|
|
"calibration/coverage@1%": 0.00078125,
|
|
"calibration/coverage@10%": 0.00078125,
|
|
"calibration/coverage@15%": 0.00078125,
|
|
"calibration/coverage@20%": 0.014453125,
|
|
"calibration/coverage@25%": 0.027734375,
|
|
"calibration/coverage@30%": 0.1140625,
|
|
"calibration/coverage@5%": 0.00078125,
|
|
"calibration/ece": 0.19613354282701326,
|
|
"calibration/mean_confidence": 0.5161718102828323,
|
|
"calibration/prompt_uniqueness": 0.89189453125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 873.2,
|
|
"completions/max_terminated_length": 439.2,
|
|
"completions/mean_length": 132.3017578125,
|
|
"completions/mean_terminated_length": 132.02688598632812,
|
|
"completions/min_length": 53.8,
|
|
"completions/min_terminated_length": 53.8,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.0015510269440710545,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 182260737.0,
|
|
"reward": 0.8931734323501587,
|
|
"reward_std": 0.12140908688306809,
|
|
"rewards/accuracy_reward": 0.46357421875,
|
|
"rewards/brier_reward": 0.7294286847114563,
|
|
"rewards/confidence_uniqueness_reward": 0.9577026724815368,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.003854288347065449,
|
|
"rewards/frontier_coverage_0": 0.09462544620037079,
|
|
"rewards/frontier_coverage_1": 0.09462544620037079,
|
|
"rewards/frontier_coverage_10": 0.09462544620037079,
|
|
"rewards/frontier_coverage_15": 0.09462544620037079,
|
|
"rewards/frontier_coverage_20": 0.09462544620037079,
|
|
"rewards/frontier_coverage_25": 0.09462544620037079,
|
|
"rewards/frontier_coverage_5": 0.09462544620037079,
|
|
"rewards/frontier_ece_reward": 0.013301673159003258,
|
|
"rewards/frontier_entropy_batch_reward": -0.1659554123878479,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.140850830078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.188671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.18363622725009918,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.490625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0704254150390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0704254150390625,
|
|
"signal/advantage_abs_mean": 0.09490404278039932,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09490404278039932,
|
|
"signal/advantage_pre_scale_std": 0.13685409128665924,
|
|
"signal/advantage_std": 0.13685409128665924,
|
|
"signal/brier_reward/centered_abs_mean": 0.20794688463211058,
|
|
"signal/brier_reward/group_bin_occupancy": 0.905078125,
|
|
"signal/brier_reward/group_std_mean": 0.25734142661094667,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020794688165187834,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020794688165187834,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011929828859865665,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016922668367624284,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011929828440770506,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011929828440770506,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002570530725643039,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.776953125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038315205834805965,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.213163508917205e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.213163508917205e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.25130972266197205,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3175831615924835,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003141371626406908,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06303459852933883,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08597700744867325,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006303459964692592,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006303459964692592,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2539799213409424,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.76171875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33189951777458193,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02539799325168133,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02539799325168133,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34942118001298506,
|
|
"calibration/batch_distribution_entropy": 0.9799110967706934,
|
|
"calibration/batch_entropy_100bins": 0.9610832692207,
|
|
"calibration/batch_entropy_10bins": 0.9799110967706934,
|
|
"calibration/batch_entropy_50bins": 0.9736960127197387,
|
|
"calibration/batch_uniqueness": 0.9573197846138358,
|
|
"calibration/buffer_distribution_entropy": 0.9584469854664572,
|
|
"calibration/buffer_entropy_100bins": 0.8916949290759867,
|
|
"calibration/buffer_entropy_10bins": 0.9584469854664572,
|
|
"calibration/buffer_entropy_50bins": 0.9246897994577994,
|
|
"calibration/confidence_entropy": 0.46040453975152423,
|
|
"calibration/coverage@0%": 0.010939028864970645,
|
|
"calibration/coverage@1%": 0.010939028864970645,
|
|
"calibration/coverage@10%": 0.014454653864970645,
|
|
"calibration/coverage@15%": 0.01915132705479452,
|
|
"calibration/coverage@20%": 0.10548938967710372,
|
|
"calibration/coverage@25%": 0.20474865459882582,
|
|
"calibration/coverage@30%": 0.3540063906555773,
|
|
"calibration/coverage@5%": 0.010939028864970645,
|
|
"calibration/ece": 0.14488413962169583,
|
|
"calibration/mean_confidence": 0.4789294839135028,
|
|
"calibration/prompt_uniqueness": 0.8839207612513007,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 870.8,
|
|
"completions/max_terminated_length": 420.6,
|
|
"completions/mean_length": 143.64931640625,
|
|
"completions/mean_terminated_length": 143.24107360839844,
|
|
"completions/min_length": 60.6,
|
|
"completions/min_terminated_length": 60.6,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.0014950234908610582,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 198546522.0,
|
|
"reward": 0.9052001118659974,
|
|
"reward_std": 0.11826727986335754,
|
|
"rewards/accuracy_reward": 0.491796875,
|
|
"rewards/brier_reward": 0.7406868457794189,
|
|
"rewards/confidence_uniqueness_reward": 0.9558995842933655,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0034085330553352833,
|
|
"rewards/frontier_coverage_0": 0.09948756024241448,
|
|
"rewards/frontier_coverage_1": 0.09948756024241448,
|
|
"rewards/frontier_coverage_10": 0.09948756024241448,
|
|
"rewards/frontier_coverage_15": 0.09948756024241448,
|
|
"rewards/frontier_coverage_20": 0.09948756024241448,
|
|
"rewards/frontier_coverage_25": 0.09948756024241448,
|
|
"rewards/frontier_coverage_5": 0.09948756024241448,
|
|
"rewards/frontier_ece_reward": 0.01857722718268633,
|
|
"rewards/frontier_entropy_batch_reward": -0.20535460412502288,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13638916015625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18984375,
|
|
"signal/accuracy_reward/group_std_mean": 0.18098436594009398,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.48125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.068194580078125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.068194580078125,
|
|
"signal/advantage_abs_mean": 0.0910866379737854,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0910866379737854,
|
|
"signal/advantage_pre_scale_std": 0.1333424761891365,
|
|
"signal/advantage_std": 0.1333424761891365,
|
|
"signal/brier_reward/centered_abs_mean": 0.2078978717327118,
|
|
"signal/brier_reward/group_bin_occupancy": 0.876953125,
|
|
"signal/brier_reward/group_std_mean": 0.2592237114906311,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020789787545800208,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020789787545800208,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.015980724617838858,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.897265625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02260695695877075,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0015980724710971117,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015980724710971117,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.003866990189999342,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002407692139968276,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.776171875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00357803120277822,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.009615102200769e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.009615102200769e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.262815922498703,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.908203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.33183927536010743,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003285199077799916,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.057891517877578735,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.860546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07930349558591843,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005789151694625616,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005789151694625616,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3005888402462006,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.749609375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.379769903421402,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.030058884248137473,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030058884248137473,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2947941744415761,
|
|
"calibration/batch_distribution_entropy": 0.9785599217065368,
|
|
"calibration/batch_entropy_100bins": 0.9581661507133953,
|
|
"calibration/batch_entropy_10bins": 0.9785599217065368,
|
|
"calibration/batch_entropy_50bins": 0.9720473639947895,
|
|
"calibration/batch_uniqueness": 0.9595977783203125,
|
|
"calibration/buffer_distribution_entropy": 0.963559167866169,
|
|
"calibration/buffer_entropy_100bins": 0.906266340287414,
|
|
"calibration/buffer_entropy_10bins": 0.963559167866169,
|
|
"calibration/buffer_entropy_50bins": 0.9352872608533884,
|
|
"calibration/confidence_entropy": 0.47836497278573387,
|
|
"calibration/coverage@0%": 0.012109375,
|
|
"calibration/coverage@1%": 0.012109375,
|
|
"calibration/coverage@10%": 0.0625,
|
|
"calibration/coverage@15%": 0.185546875,
|
|
"calibration/coverage@20%": 0.290234375,
|
|
"calibration/coverage@25%": 0.455078125,
|
|
"calibration/coverage@30%": 0.572265625,
|
|
"calibration/coverage@5%": 0.021484375,
|
|
"calibration/ece": 0.15920833551919994,
|
|
"calibration/mean_confidence": 0.5314152702200673,
|
|
"calibration/prompt_uniqueness": 0.88876953125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 461.8,
|
|
"completions/max_terminated_length": 461.8,
|
|
"completions/mean_length": 155.57880859375,
|
|
"completions/mean_terminated_length": 155.57880859375,
|
|
"completions/min_length": 65.8,
|
|
"completions/min_terminated_length": 65.8,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0011343832593411207,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 215171873.0,
|
|
"reward": 0.9295921802520752,
|
|
"reward_std": 0.11272455304861069,
|
|
"rewards/accuracy_reward": 0.5361328125,
|
|
"rewards/brier_reward": 0.753898274898529,
|
|
"rewards/confidence_uniqueness_reward": 0.960012423992157,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003036556579172611,
|
|
"rewards/frontier_coverage_0": 0.07245685756206513,
|
|
"rewards/frontier_coverage_1": 0.07245685756206513,
|
|
"rewards/frontier_coverage_10": 0.07245685756206513,
|
|
"rewards/frontier_coverage_15": 0.07245685756206513,
|
|
"rewards/frontier_coverage_20": 0.07245685756206513,
|
|
"rewards/frontier_coverage_25": 0.07245685756206513,
|
|
"rewards/frontier_coverage_5": 0.07245685756206513,
|
|
"rewards/frontier_ece_reward": 0.022372994944453238,
|
|
"rewards/frontier_entropy_batch_reward": -0.18355790972709657,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.13197021484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.187890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.17548914551734923,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.065985107421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.065985107421875,
|
|
"signal/advantage_abs_mean": 0.08748974055051803,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08748974055051803,
|
|
"signal/advantage_pre_scale_std": 0.1294364556670189,
|
|
"signal/advantage_std": 0.1294364556670189,
|
|
"signal/brier_reward/centered_abs_mean": 0.1890464246273041,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86875,
|
|
"signal/brier_reward/group_std_mean": 0.23756815493106842,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0189046423882246,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0189046423882246,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012290091067552567,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015977666527032853,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012290091253817081,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012290091253817081,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024446202907711266,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.78046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003613197011873126,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.055775378015824e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.055775378015824e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22794330716133118,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8890625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2930518627166748,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00284929140470922,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0545510284602642,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.07529444098472596,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00545510295778513,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00545510295778513,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27159354090690613,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3504547536373138,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027159354835748672,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027159354835748672,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3189934672338976,
|
|
"calibration/batch_distribution_entropy": 0.9939897166524112,
|
|
"calibration/batch_entropy_100bins": 0.967330341286187,
|
|
"calibration/batch_entropy_10bins": 0.9939897166524112,
|
|
"calibration/batch_entropy_50bins": 0.9831548626039949,
|
|
"calibration/batch_uniqueness": 0.9600825121686686,
|
|
"calibration/buffer_distribution_entropy": 0.9681576729442384,
|
|
"calibration/buffer_entropy_100bins": 0.9180615027527184,
|
|
"calibration/buffer_entropy_10bins": 0.9681576729442384,
|
|
"calibration/buffer_entropy_50bins": 0.9437593307933965,
|
|
"calibration/confidence_entropy": 0.4933469731122976,
|
|
"calibration/coverage@0%": 0.00390625,
|
|
"calibration/coverage@1%": 0.00390625,
|
|
"calibration/coverage@10%": 0.09296875,
|
|
"calibration/coverage@15%": 0.2039675245098039,
|
|
"calibration/coverage@20%": 0.29543045343137253,
|
|
"calibration/coverage@25%": 0.4104151348039215,
|
|
"calibration/coverage@30%": 0.5085263480392157,
|
|
"calibration/coverage@5%": 0.00390625,
|
|
"calibration/ece": 0.15230069879851205,
|
|
"calibration/mean_confidence": 0.5016724122037817,
|
|
"calibration/prompt_uniqueness": 0.885172063334634,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1095.8,
|
|
"completions/max_terminated_length": 681.2,
|
|
"completions/mean_length": 163.984765625,
|
|
"completions/mean_terminated_length": 163.58394470214844,
|
|
"completions/min_length": 66.8,
|
|
"completions/min_terminated_length": 66.8,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0011190164368599653,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 232004261.0,
|
|
"reward": 0.9104782462120056,
|
|
"reward_std": 0.10773791372776031,
|
|
"rewards/accuracy_reward": 0.48671875,
|
|
"rewards/brier_reward": 0.7708696365356446,
|
|
"rewards/confidence_uniqueness_reward": 0.9604137420654297,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.003139969985932112,
|
|
"rewards/frontier_coverage_0": 0.11844078451395035,
|
|
"rewards/frontier_coverage_1": 0.11844078451395035,
|
|
"rewards/frontier_coverage_10": 0.11844078451395035,
|
|
"rewards/frontier_coverage_15": 0.11844078451395035,
|
|
"rewards/frontier_coverage_20": 0.11844078451395035,
|
|
"rewards/frontier_coverage_25": 0.11844078451395035,
|
|
"rewards/frontier_coverage_5": 0.11844078451395035,
|
|
"rewards/frontier_ece_reward": 0.021617041900753975,
|
|
"rewards/frontier_entropy_batch_reward": -0.18300187289714814,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1204345703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.182421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15962167084217072,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06021728515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06021728515625,
|
|
"signal/advantage_abs_mean": 0.08397592157125473,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08397592157125473,
|
|
"signal/advantage_pre_scale_std": 0.1261191889643669,
|
|
"signal/advantage_std": 0.1261191889643669,
|
|
"signal/brier_reward/centered_abs_mean": 0.17388453483581542,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/brier_reward/group_std_mean": 0.219650474190712,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01738845370709896,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01738845370709896,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012389418855309486,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9328125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01659379303455353,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012389418901875616,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012389418901875616,
|
|
"signal/format_reward/centered_abs_mean": 0.00074462890625,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0018734002020210027,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023782884702086447,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003504908038303256,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.972860493173357e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.972860493173357e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21117229461669923,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.891015625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27140182852745054,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026396537199616433,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04839524030685425,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.842578125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06754831522703171,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004839524254202842,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004839524254202842,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2688676655292511,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34602165818214414,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026886767894029617,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026886767894029617,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3722649127966441,
|
|
"calibration/batch_distribution_entropy": 0.9747995210345157,
|
|
"calibration/batch_entropy_100bins": 0.9571055886452392,
|
|
"calibration/batch_entropy_10bins": 0.9747995210345157,
|
|
"calibration/batch_entropy_50bins": 0.9714064992183363,
|
|
"calibration/batch_uniqueness": 0.9584098953076943,
|
|
"calibration/buffer_distribution_entropy": 0.9721121659408224,
|
|
"calibration/buffer_entropy_100bins": 0.9276944237884651,
|
|
"calibration/buffer_entropy_10bins": 0.9721121659408224,
|
|
"calibration/buffer_entropy_50bins": 0.9509444674613132,
|
|
"calibration/confidence_entropy": 0.4904281243945256,
|
|
"calibration/coverage@0%": 0.000390625,
|
|
"calibration/coverage@1%": 0.000390625,
|
|
"calibration/coverage@10%": 0.078125,
|
|
"calibration/coverage@15%": 0.144921875,
|
|
"calibration/coverage@20%": 0.20859375,
|
|
"calibration/coverage@25%": 0.257421875,
|
|
"calibration/coverage@30%": 0.278515625,
|
|
"calibration/coverage@5%": 0.036328125,
|
|
"calibration/ece": 0.17131989400604017,
|
|
"calibration/mean_confidence": 0.5100639151774843,
|
|
"calibration/prompt_uniqueness": 0.8860584270941727,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1245.6,
|
|
"completions/max_terminated_length": 700.6,
|
|
"completions/mean_length": 166.92529296875,
|
|
"completions/mean_terminated_length": 166.2565490722656,
|
|
"completions/min_length": 68.4,
|
|
"completions/min_terminated_length": 68.4,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0012799223186448216,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0017,
|
|
"num_tokens": 248965256.0,
|
|
"reward": 0.9279002666473388,
|
|
"reward_std": 0.11562621295452118,
|
|
"rewards/accuracy_reward": 0.54033203125,
|
|
"rewards/brier_reward": 0.752100133895874,
|
|
"rewards/confidence_uniqueness_reward": 0.9600031733512878,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.0029632408171892167,
|
|
"rewards/frontier_coverage_0": 0.06165201477706432,
|
|
"rewards/frontier_coverage_1": 0.06165201477706432,
|
|
"rewards/frontier_coverage_10": 0.06165201477706432,
|
|
"rewards/frontier_coverage_15": 0.06165201477706432,
|
|
"rewards/frontier_coverage_20": 0.06165201477706432,
|
|
"rewards/frontier_coverage_25": 0.06165201477706432,
|
|
"rewards/frontier_coverage_5": 0.06165201477706432,
|
|
"rewards/frontier_ece_reward": 0.018022438511252403,
|
|
"rewards/frontier_entropy_batch_reward": -0.2034287005662918,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.137420654296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1800607681274414,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.49375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0687103271484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0687103271484375,
|
|
"signal/advantage_abs_mean": 0.0900656446814537,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0900656446814537,
|
|
"signal/advantage_pre_scale_std": 0.13366247713565826,
|
|
"signal/advantage_std": 0.13366247713565826,
|
|
"signal/brier_reward/centered_abs_mean": 0.17995524406433105,
|
|
"signal/brier_reward/group_bin_occupancy": 0.87421875,
|
|
"signal/brier_reward/group_std_mean": 0.2263825535774231,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017995523661375044,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017995523661375044,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012892700731754303,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.92578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018097008019685744,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012892701663076878,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012892701663076878,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629800856113,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002403355622664094,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.784375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035244593862444164,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.0041945865377784e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.0041945865377784e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21193841695785523,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2735629081726074,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026492302305996416,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04577092379331589,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.835546875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06537232622504234,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004577092453837394,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004577092453837394,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28712775707244875,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3657856583595276,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02871277555823326,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02871277555823326,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2756884567471795,
|
|
"calibration/batch_distribution_entropy": 0.981849015365291,
|
|
"calibration/batch_entropy_100bins": 0.963794621717061,
|
|
"calibration/batch_entropy_10bins": 0.981849015365291,
|
|
"calibration/batch_entropy_50bins": 0.9772544795865941,
|
|
"calibration/batch_uniqueness": 0.9583403270974067,
|
|
"calibration/buffer_distribution_entropy": 0.9748055844875566,
|
|
"calibration/buffer_entropy_100bins": 0.9354970264003273,
|
|
"calibration/buffer_entropy_10bins": 0.9748055844875566,
|
|
"calibration/buffer_entropy_50bins": 0.956262724283142,
|
|
"calibration/confidence_entropy": 0.47419515340800844,
|
|
"calibration/coverage@0%": 0.019922639432485324,
|
|
"calibration/coverage@1%": 0.019922639432485324,
|
|
"calibration/coverage@10%": 0.1671882644324853,
|
|
"calibration/coverage@15%": 0.29453660102739726,
|
|
"calibration/coverage@20%": 0.3945366010273973,
|
|
"calibration/coverage@25%": 0.46133347602739727,
|
|
"calibration/coverage@30%": 0.6404713490704501,
|
|
"calibration/coverage@5%": 0.08242263943248532,
|
|
"calibration/ece": 0.1516829189459081,
|
|
"calibration/mean_confidence": 0.4916690845191692,
|
|
"calibration/prompt_uniqueness": 0.8781654445239333,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 878.2,
|
|
"completions/max_terminated_length": 482.8,
|
|
"completions/mean_length": 163.8306640625,
|
|
"completions/mean_terminated_length": 163.4287567138672,
|
|
"completions/min_length": 66.0,
|
|
"completions/min_terminated_length": 66.0,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.001148949726484716,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 265697698.0,
|
|
"reward": 0.9235002279281617,
|
|
"reward_std": 0.1053592398762703,
|
|
"rewards/accuracy_reward": 0.5205078125,
|
|
"rewards/brier_reward": 0.7646861553192139,
|
|
"rewards/confidence_uniqueness_reward": 0.9591035127639771,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0028343759011477234,
|
|
"rewards/frontier_coverage_0": 0.09306152537465096,
|
|
"rewards/frontier_coverage_1": 0.09306152537465096,
|
|
"rewards/frontier_coverage_10": 0.09306152537465096,
|
|
"rewards/frontier_coverage_15": 0.09306152537465096,
|
|
"rewards/frontier_coverage_20": 0.09306152537465096,
|
|
"rewards/frontier_coverage_25": 0.09306152537465096,
|
|
"rewards/frontier_coverage_5": 0.09306152537465096,
|
|
"rewards/frontier_ece_reward": 0.018909335136413574,
|
|
"rewards/frontier_entropy_batch_reward": -0.18935712277889252,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12547607421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
|
|
"signal/accuracy_reward/group_std_mean": 0.16060097515583038,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062738037109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062738037109375,
|
|
"signal/advantage_abs_mean": 0.08297341018915176,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08297341018915176,
|
|
"signal/advantage_pre_scale_std": 0.12462374716997146,
|
|
"signal/advantage_std": 0.12462374716997146,
|
|
"signal/brier_reward/centered_abs_mean": 0.1717162013053894,
|
|
"signal/brier_reward/group_bin_occupancy": 0.865625,
|
|
"signal/brier_reward/group_std_mean": 0.21583383977413179,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017171620205044747,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017171620205044747,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012881658598780632,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.929296875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017492034845054148,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012881658738479018,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012881658738479018,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022053365129977463,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.780078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00320956208743155,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7566706557990982e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7566706557990982e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21591795980930328,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.890234375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2742127299308777,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026989746373146774,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04098983183503151,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82890625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05865926668047905,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004098983202129603,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004098983202129603,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27483277320861815,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35315130949020385,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027483277022838593,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027483277022838593,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3897235445216074,
|
|
"calibration/batch_distribution_entropy": 0.9871985803996584,
|
|
"calibration/batch_entropy_100bins": 0.9650498108188612,
|
|
"calibration/batch_entropy_10bins": 0.9871985803996584,
|
|
"calibration/batch_entropy_50bins": 0.9774071673496001,
|
|
"calibration/batch_uniqueness": 0.9601234608508132,
|
|
"calibration/buffer_distribution_entropy": 0.9776273835298188,
|
|
"calibration/buffer_entropy_100bins": 0.9421174103566827,
|
|
"calibration/buffer_entropy_10bins": 0.9776273835298188,
|
|
"calibration/buffer_entropy_50bins": 0.9610857397243047,
|
|
"calibration/confidence_entropy": 0.4835290106060966,
|
|
"calibration/coverage@0%": 0.0035194471624266145,
|
|
"calibration/coverage@1%": 0.0035194471624266145,
|
|
"calibration/coverage@10%": 0.05117569716242661,
|
|
"calibration/coverage@15%": 0.1171913221624266,
|
|
"calibration/coverage@20%": 0.15980461105675148,
|
|
"calibration/coverage@25%": 0.2211900684931507,
|
|
"calibration/coverage@30%": 0.3173709637964775,
|
|
"calibration/coverage@5%": 0.0035194471624266145,
|
|
"calibration/ece": 0.14380001971983383,
|
|
"calibration/mean_confidence": 0.5027473010005465,
|
|
"calibration/prompt_uniqueness": 0.880182234082336,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1118.0,
|
|
"completions/max_terminated_length": 649.4,
|
|
"completions/mean_length": 167.30830078125,
|
|
"completions/mean_terminated_length": 166.77369995117186,
|
|
"completions/min_length": 64.8,
|
|
"completions/min_terminated_length": 64.8,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0011026699794456363,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 282376631.0,
|
|
"reward": 0.9143985748291016,
|
|
"reward_std": 0.10913633704185485,
|
|
"rewards/accuracy_reward": 0.503125,
|
|
"rewards/brier_reward": 0.7558487296104431,
|
|
"rewards/confidence_uniqueness_reward": 0.958933699131012,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.003067450597882271,
|
|
"rewards/frontier_coverage_0": 0.09763064086437226,
|
|
"rewards/frontier_coverage_1": 0.09763064086437226,
|
|
"rewards/frontier_coverage_10": 0.09763064086437226,
|
|
"rewards/frontier_coverage_15": 0.09763064086437226,
|
|
"rewards/frontier_coverage_20": 0.09763064086437226,
|
|
"rewards/frontier_coverage_25": 0.09763064086437226,
|
|
"rewards/frontier_coverage_5": 0.09763064086437226,
|
|
"rewards/frontier_ece_reward": 0.016011307016015053,
|
|
"rewards/frontier_entropy_batch_reward": -0.18454676866531372,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1283447265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1672771155834198,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06417236328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06417236328125,
|
|
"signal/advantage_abs_mean": 0.08471592962741852,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08471592962741852,
|
|
"signal/advantage_pre_scale_std": 0.12727494090795516,
|
|
"signal/advantage_std": 0.12727494090795516,
|
|
"signal/brier_reward/centered_abs_mean": 0.1756508618593216,
|
|
"signal/brier_reward/group_bin_occupancy": 0.872265625,
|
|
"signal/brier_reward/group_std_mean": 0.2209733545780182,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017565086483955383,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.017565086483955383,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013165917806327343,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018373236805200577,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013165918411687017,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013165918411687017,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002310941834002733,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.778125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034583484288305045,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8886771542602218e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8886771542602218e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21623624563217164,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2773744761943817,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002702953014522791,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04007608145475387,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.824609375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05737483724951744,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004007608164101839,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004007608164101839,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26959097683429717,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3461661696434021,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026959098130464553,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026959098130464553,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33929897089230937,
|
|
"calibration/batch_distribution_entropy": 0.989794172784116,
|
|
"calibration/batch_entropy_100bins": 0.9616161331101599,
|
|
"calibration/batch_entropy_10bins": 0.989794172784116,
|
|
"calibration/batch_entropy_50bins": 0.9782942991565935,
|
|
"calibration/batch_uniqueness": 0.9621734619140625,
|
|
"calibration/buffer_distribution_entropy": 0.9798353797281024,
|
|
"calibration/buffer_entropy_100bins": 0.9472962073477508,
|
|
"calibration/buffer_entropy_10bins": 0.9798353797281024,
|
|
"calibration/buffer_entropy_50bins": 0.9648110714996019,
|
|
"calibration/confidence_entropy": 0.5029849704441383,
|
|
"calibration/coverage@0%": 0.016796875,
|
|
"calibration/coverage@1%": 0.016796875,
|
|
"calibration/coverage@10%": 0.06796875,
|
|
"calibration/coverage@15%": 0.10859375,
|
|
"calibration/coverage@20%": 0.158203125,
|
|
"calibration/coverage@25%": 0.209375,
|
|
"calibration/coverage@30%": 0.330078125,
|
|
"calibration/coverage@5%": 0.026171875,
|
|
"calibration/ece": 0.13708101797225564,
|
|
"calibration/mean_confidence": 0.5260060638175437,
|
|
"calibration/prompt_uniqueness": 0.8890625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 990.4,
|
|
"completions/max_terminated_length": 636.2,
|
|
"completions/mean_length": 157.38349609375,
|
|
"completions/mean_terminated_length": 157.11478271484376,
|
|
"completions/min_length": 65.2,
|
|
"completions/min_terminated_length": 65.2,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0010967063717544079,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 298946414.0,
|
|
"reward": 0.9226956605911255,
|
|
"reward_std": 0.10692842602729798,
|
|
"rewards/accuracy_reward": 0.5185546875,
|
|
"rewards/brier_reward": 0.759922206401825,
|
|
"rewards/confidence_uniqueness_reward": 0.9619071364402771,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0029738360550254582,
|
|
"rewards/frontier_coverage_0": 0.08782123178243637,
|
|
"rewards/frontier_coverage_1": 0.08782123178243637,
|
|
"rewards/frontier_coverage_10": 0.08782123178243637,
|
|
"rewards/frontier_coverage_15": 0.08782123178243637,
|
|
"rewards/frontier_coverage_20": 0.08782123178243637,
|
|
"rewards/frontier_coverage_25": 0.08782123178243637,
|
|
"rewards/frontier_coverage_5": 0.08782123178243637,
|
|
"rewards/frontier_ece_reward": 0.015628389501944184,
|
|
"rewards/frontier_entropy_batch_reward": -0.17876963317394257,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.12471923828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.16823607087135314,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.50625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062359619140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.062359619140625,
|
|
"signal/advantage_abs_mean": 0.08173956871032714,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08173956871032714,
|
|
"signal/advantage_pre_scale_std": 0.12362392991781235,
|
|
"signal/advantage_std": 0.12362392991781235,
|
|
"signal/brier_reward/centered_abs_mean": 0.16985757648944855,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8671875,
|
|
"signal/brier_reward/group_std_mean": 0.21388141214847564,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016985757648944853,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016985757648944853,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011985784396529198,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.937890625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015646530874073507,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011985784396529198,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011985784396529198,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022517605219036342,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.753125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00341446828097105,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8147007833467795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8147007833467795e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20966576039791107,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.886328125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2668231546878815,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002620822051540017,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03799701854586601,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.808203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05444479286670685,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003799702040851116,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003799702040851116,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26266041994094846,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3405035316944122,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02626604326069355,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02626604326069355,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31348549126500747,
|
|
"calibration/batch_distribution_entropy": 0.984274980481597,
|
|
"calibration/batch_entropy_100bins": 0.9631625065111967,
|
|
"calibration/batch_entropy_10bins": 0.984274980481597,
|
|
"calibration/batch_entropy_50bins": 0.9770299989819294,
|
|
"calibration/batch_uniqueness": 0.9626984046368829,
|
|
"calibration/buffer_distribution_entropy": 0.9815639661047172,
|
|
"calibration/buffer_entropy_100bins": 0.9515858500656005,
|
|
"calibration/buffer_entropy_10bins": 0.9815639661047172,
|
|
"calibration/buffer_entropy_50bins": 0.967851841375308,
|
|
"calibration/confidence_entropy": 0.5032728046019687,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.014453125,
|
|
"calibration/coverage@15%": 0.11645899584148726,
|
|
"calibration/coverage@20%": 0.2305727128180039,
|
|
"calibration/coverage@25%": 0.3513194104696673,
|
|
"calibration/coverage@30%": 0.4826749021526419,
|
|
"calibration/coverage@5%": 0.009765625,
|
|
"calibration/ece": 0.12730379469726888,
|
|
"calibration/mean_confidence": 0.5350013606511557,
|
|
"calibration/prompt_uniqueness": 0.8859659534339229,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 721.4,
|
|
"completions/max_terminated_length": 502.4,
|
|
"completions/mean_length": 158.0119140625,
|
|
"completions/mean_terminated_length": 157.87767639160157,
|
|
"completions/min_length": 63.0,
|
|
"completions/min_terminated_length": 63.0,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0013374168192967772,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 315494408.0,
|
|
"reward": 0.920866048336029,
|
|
"reward_std": 0.10256336778402328,
|
|
"rewards/accuracy_reward": 0.51435546875,
|
|
"rewards/brier_reward": 0.7553410172462464,
|
|
"rewards/confidence_uniqueness_reward": 0.9617467761039734,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003019801015034318,
|
|
"rewards/frontier_coverage_0": 0.08569234870374202,
|
|
"rewards/frontier_coverage_1": 0.08569234870374202,
|
|
"rewards/frontier_coverage_10": 0.08569234870374202,
|
|
"rewards/frontier_coverage_15": 0.08569234870374202,
|
|
"rewards/frontier_coverage_20": 0.08569234870374202,
|
|
"rewards/frontier_coverage_25": 0.08569234870374202,
|
|
"rewards/frontier_coverage_5": 0.08569234870374202,
|
|
"rewards/frontier_ece_reward": 0.014303101412951946,
|
|
"rewards/frontier_entropy_batch_reward": -0.16813477575778962,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.118218994140625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.181640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15723580718040467,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0591094970703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0591094970703125,
|
|
"signal/advantage_abs_mean": 0.07947536259889602,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07947536259889602,
|
|
"signal/advantage_pre_scale_std": 0.1194717451930046,
|
|
"signal/advantage_std": 0.1194717451930046,
|
|
"signal/brier_reward/centered_abs_mean": 0.16404346823692323,
|
|
"signal/brier_reward/group_bin_occupancy": 0.878515625,
|
|
"signal/brier_reward/group_std_mean": 0.20719643235206603,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016404346562922,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016404346562922,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012253463082015515,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01594906710088253,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001225346396677196,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001225346396677196,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021592382341623305,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032209414057433605,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.699047727219295e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.699047727219295e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2054966926574707,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2626974046230316,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025687087327241898,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03668390363454819,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.805078125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05322126373648643,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003668390540406108,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003668390540406108,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25058538317680357,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3319805324077606,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025058538839221,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025058538839221,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2693574848107298,
|
|
"calibration/batch_distribution_entropy": 0.9883345901493682,
|
|
"calibration/batch_entropy_100bins": 0.964873857679726,
|
|
"calibration/batch_entropy_10bins": 0.9883345901493682,
|
|
"calibration/batch_entropy_50bins": 0.9762615204324228,
|
|
"calibration/batch_uniqueness": 0.962933349609375,
|
|
"calibration/buffer_distribution_entropy": 0.9829313733544339,
|
|
"calibration/buffer_entropy_100bins": 0.9553256663761157,
|
|
"calibration/buffer_entropy_10bins": 0.9829313733544339,
|
|
"calibration/buffer_entropy_50bins": 0.9704623223411284,
|
|
"calibration/confidence_entropy": 0.5033581533240031,
|
|
"calibration/coverage@0%": 0.008984375,
|
|
"calibration/coverage@1%": 0.008984375,
|
|
"calibration/coverage@10%": 0.198046875,
|
|
"calibration/coverage@15%": 0.321875,
|
|
"calibration/coverage@20%": 0.42265625,
|
|
"calibration/coverage@25%": 0.519140625,
|
|
"calibration/coverage@30%": 0.587890625,
|
|
"calibration/coverage@5%": 0.051171875,
|
|
"calibration/ece": 0.1460491076202734,
|
|
"calibration/mean_confidence": 0.536501651706636,
|
|
"calibration/prompt_uniqueness": 0.88388671875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 505.2,
|
|
"completions/max_terminated_length": 505.2,
|
|
"completions/mean_length": 159.97177734375,
|
|
"completions/mean_terminated_length": 159.97177734375,
|
|
"completions/min_length": 73.0,
|
|
"completions/min_terminated_length": 73.0,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.001107752905227244,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 332221223.0,
|
|
"reward": 0.9314111828804016,
|
|
"reward_std": 0.09003743529319763,
|
|
"rewards/accuracy_reward": 0.53388671875,
|
|
"rewards/brier_reward": 0.7707650423049927,
|
|
"rewards/confidence_uniqueness_reward": 0.9618469119071961,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002678099344484508,
|
|
"rewards/frontier_coverage_0": 0.08919677138328552,
|
|
"rewards/frontier_coverage_1": 0.08919677138328552,
|
|
"rewards/frontier_coverage_10": 0.08919677138328552,
|
|
"rewards/frontier_coverage_15": 0.08919677138328552,
|
|
"rewards/frontier_coverage_20": 0.08919677138328552,
|
|
"rewards/frontier_coverage_25": 0.08919677138328552,
|
|
"rewards/frontier_coverage_5": 0.08919677138328552,
|
|
"rewards/frontier_ece_reward": 0.017109639570116998,
|
|
"rewards/frontier_entropy_batch_reward": -0.18177941143512727,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.088275146484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12341197431087494,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441375732421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0441375732421875,
|
|
"signal/advantage_abs_mean": 0.06831415593624116,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06831415593624116,
|
|
"signal/advantage_pre_scale_std": 0.10720582604408264,
|
|
"signal/advantage_std": 0.10720582604408264,
|
|
"signal/brier_reward/centered_abs_mean": 0.1551128536462784,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/brier_reward/group_std_mean": 0.19683083295822143,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01551128625869751,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01551128625869751,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011729908920824528,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.930859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015355130471289159,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011729909107089042,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011729909107089042,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002058024751022458,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.765234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031040641479194164,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5725309751578606e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5725309751578606e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18719760775566102,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2401178687810898,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023399701341986654,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.036752212792634964,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.809375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05313318446278572,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003675221325829625,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003675221325829625,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2627487242221832,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.753125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3402763903141022,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026274873316287993,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026274873316287993,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.5221447170861099,
|
|
"eval_calibration/batch_distribution_entropy": 0.9459823530950395,
|
|
"eval_calibration/batch_entropy_100bins": 0.7099314297963414,
|
|
"eval_calibration/batch_entropy_10bins": 0.9459823530950395,
|
|
"eval_calibration/batch_entropy_50bins": 0.80145287432243,
|
|
"eval_calibration/batch_uniqueness": 0.9072265625,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9836311818150062,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9572832371945064,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9836311818150062,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9718177911308372,
|
|
"eval_calibration/confidence_entropy": 0.48188092462291277,
|
|
"eval_calibration/coverage@0%": 0.0234375,
|
|
"eval_calibration/coverage@1%": 0.0234375,
|
|
"eval_calibration/coverage@10%": 0.0234375,
|
|
"eval_calibration/coverage@15%": 0.0234375,
|
|
"eval_calibration/coverage@20%": 0.0234375,
|
|
"eval_calibration/coverage@25%": 0.0234375,
|
|
"eval_calibration/coverage@30%": 0.078125,
|
|
"eval_calibration/coverage@5%": 0.0234375,
|
|
"eval_calibration/ece": 0.21960188052234064,
|
|
"eval_calibration/mean_confidence": 0.4858527777314262,
|
|
"eval_calibration/prompt_uniqueness": 0.9072265625,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 343.5,
|
|
"eval_completions/max_terminated_length": 343.5,
|
|
"eval_completions/mean_length": 161.39608001708984,
|
|
"eval_completions/mean_terminated_length": 161.39608001708984,
|
|
"eval_completions/min_length": 88.0,
|
|
"eval_completions/min_terminated_length": 88.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 332221223.0,
|
|
"eval_reward": 0.7855877131223679,
|
|
"eval_reward_std": 0.2253391109406948,
|
|
"eval_rewards/accuracy_reward": 0.40234375,
|
|
"eval_rewards/brier_reward": 0.767953634262085,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.904541015625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003360171685926616,
|
|
"eval_rewards/frontier_coverage_0": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_1": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_10": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_15": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_20": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_25": 0.17842230759561062,
|
|
"eval_rewards/frontier_coverage_5": 0.17842230759561062,
|
|
"eval_rewards/frontier_ece_reward": 0.015964159043505788,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 18.266,
|
|
"eval_samples_per_second": 27.373,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.46875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.491495244204998,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.234375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.234375,
|
|
"eval_signal/advantage_abs_mean": 0.20828185975551605,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20828185975551605,
|
|
"eval_signal/advantage_pre_scale_std": 0.2229425571858883,
|
|
"eval_signal/advantage_std": 0.2229425571858883,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21178840100765228,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
|
|
"eval_signal/brier_reward/group_std_mean": 0.26099943369627,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021178840193897486,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.021178840193897486,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037750244140625,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3671875,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04435160104185343,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037750244955532253,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037750244955532253,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00327087048208341,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8046875,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0050255340756848454,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.088588320882991e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.088588320882991e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36353210359811783,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4482342004776001,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004544151364825666,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.037730203941464424,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.765625,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.061389719136059284,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0037730205804109573,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0037730205804109573,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.219,
|
|
"step": 100
|
|
},
|
|
{
|
|
"calibration/aurc": 0.30565225146740005,
|
|
"calibration/batch_distribution_entropy": 0.9817950038846648,
|
|
"calibration/batch_entropy_100bins": 0.9574620453276896,
|
|
"calibration/batch_entropy_10bins": 0.9817950038846648,
|
|
"calibration/batch_entropy_50bins": 0.9759610198636606,
|
|
"calibration/batch_uniqueness": 0.9610809326171875,
|
|
"calibration/buffer_distribution_entropy": 0.9858778348612034,
|
|
"calibration/buffer_entropy_100bins": 0.9610770923601644,
|
|
"calibration/buffer_entropy_10bins": 0.9858778348612034,
|
|
"calibration/buffer_entropy_50bins": 0.9746952731382882,
|
|
"calibration/confidence_entropy": 0.4912567329274273,
|
|
"calibration/coverage@0%": 0.001953125,
|
|
"calibration/coverage@1%": 0.001953125,
|
|
"calibration/coverage@10%": 0.001953125,
|
|
"calibration/coverage@15%": 0.06328125,
|
|
"calibration/coverage@20%": 0.15,
|
|
"calibration/coverage@25%": 0.326171875,
|
|
"calibration/coverage@30%": 0.60625,
|
|
"calibration/coverage@5%": 0.001953125,
|
|
"calibration/ece": 0.13340430237380113,
|
|
"calibration/mean_confidence": 0.4750956146068159,
|
|
"calibration/prompt_uniqueness": 0.87880859375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 502.0,
|
|
"completions/max_terminated_length": 502.0,
|
|
"completions/mean_length": 161.48173828125,
|
|
"completions/mean_terminated_length": 161.48173828125,
|
|
"completions/min_length": 64.4,
|
|
"completions/min_terminated_length": 64.4,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0009220660431310534,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 348597228.0,
|
|
"reward": 0.9312142729759216,
|
|
"reward_std": 0.09383742660284042,
|
|
"rewards/accuracy_reward": 0.5375,
|
|
"rewards/brier_reward": 0.7658124089241027,
|
|
"rewards/confidence_uniqueness_reward": 0.9620379209518433,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002564789913594723,
|
|
"rewards/frontier_coverage_0": 0.086562230437994,
|
|
"rewards/frontier_coverage_1": 0.086562230437994,
|
|
"rewards/frontier_coverage_10": 0.086562230437994,
|
|
"rewards/frontier_coverage_15": 0.086562230437994,
|
|
"rewards/frontier_coverage_20": 0.086562230437994,
|
|
"rewards/frontier_coverage_25": 0.086562230437994,
|
|
"rewards/frontier_coverage_5": 0.086562230437994,
|
|
"rewards/frontier_ece_reward": 0.016775081306695937,
|
|
"rewards/frontier_entropy_batch_reward": -0.19491543173789977,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10306396484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13901238441467284,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051531982421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051531982421875,
|
|
"signal/advantage_abs_mean": 0.07275230437517166,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07275230437517166,
|
|
"signal/advantage_pre_scale_std": 0.11130416691303253,
|
|
"signal/advantage_std": 0.11130416691303253,
|
|
"signal/brier_reward/centered_abs_mean": 0.15814976394176483,
|
|
"signal/brier_reward/group_bin_occupancy": 0.862890625,
|
|
"signal/brier_reward/group_std_mean": 0.20069519579410552,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01581497713923454,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01581497713923454,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013001594133675099,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01670000497251749,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013001594459638,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013001594459638,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019870033720508218,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7640625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030117711983621122,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.4837543605826795e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.4837543605826795e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2016854852437973,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.889453125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2578335404396057,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025210686959326266,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03453442975878716,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.797265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05071103274822235,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003453443106263876,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003453443106263876,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709068328142166,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74609375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34899981021881105,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027090684697031974,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027090684697031974,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3341179136812372,
|
|
"calibration/batch_distribution_entropy": 0.9588299480996344,
|
|
"calibration/batch_entropy_100bins": 0.9429023940351428,
|
|
"calibration/batch_entropy_10bins": 0.9588299480996344,
|
|
"calibration/batch_entropy_50bins": 0.9597674673427136,
|
|
"calibration/batch_uniqueness": 0.9558685302734375,
|
|
"calibration/buffer_distribution_entropy": 0.9919930074210095,
|
|
"calibration/buffer_entropy_100bins": 0.9728636582987387,
|
|
"calibration/buffer_entropy_10bins": 0.9919930074210095,
|
|
"calibration/buffer_entropy_50bins": 0.9833901381550991,
|
|
"calibration/confidence_entropy": 0.4574315079144936,
|
|
"calibration/coverage@0%": 0.01953125,
|
|
"calibration/coverage@1%": 0.01953125,
|
|
"calibration/coverage@10%": 0.1015625,
|
|
"calibration/coverage@15%": 0.225390625,
|
|
"calibration/coverage@20%": 0.333203125,
|
|
"calibration/coverage@25%": 0.41015625,
|
|
"calibration/coverage@30%": 0.48515625,
|
|
"calibration/coverage@5%": 0.0265625,
|
|
"calibration/ece": 0.13175204310416827,
|
|
"calibration/mean_confidence": 0.44462494666585883,
|
|
"calibration/prompt_uniqueness": 0.865966796875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 614.4,
|
|
"completions/max_terminated_length": 614.4,
|
|
"completions/mean_length": 162.67568359375,
|
|
"completions/mean_terminated_length": 162.67568359375,
|
|
"completions/min_length": 72.6,
|
|
"completions/min_terminated_length": 72.6,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.0011161722941324115,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 365523443.0,
|
|
"reward": 0.9019862651824951,
|
|
"reward_std": 0.09622626602649689,
|
|
"rewards/accuracy_reward": 0.47529296875,
|
|
"rewards/brier_reward": 0.7733848929405213,
|
|
"rewards/confidence_uniqueness_reward": 0.9622901916503906,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.002903068531304598,
|
|
"rewards/frontier_coverage_0": 0.13595542460680007,
|
|
"rewards/frontier_coverage_1": 0.13595542460680007,
|
|
"rewards/frontier_coverage_10": 0.13595542460680007,
|
|
"rewards/frontier_coverage_15": 0.13595542460680007,
|
|
"rewards/frontier_coverage_20": 0.13595542460680007,
|
|
"rewards/frontier_coverage_25": 0.13595542460680007,
|
|
"rewards/frontier_coverage_5": 0.13595542460680007,
|
|
"rewards/frontier_ece_reward": 0.015835122019052506,
|
|
"rewards/frontier_entropy_batch_reward": -0.22671036124229432,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.110052490234375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.175390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.14299680292606354,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.596875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0550262451171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0550262451171875,
|
|
"signal/advantage_abs_mean": 0.07607890367507934,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07607890367507934,
|
|
"signal/advantage_pre_scale_std": 0.11513545215129853,
|
|
"signal/advantage_std": 0.11513545215129853,
|
|
"signal/brier_reward/centered_abs_mean": 0.15754351615905762,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85390625,
|
|
"signal/brier_reward/group_std_mean": 0.2009361833333969,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015754351764917372,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015754351764917372,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014543581008911132,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.884375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018578647449612618,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014543581288307904,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014543581288307904,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002373543428257108,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.753515625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0036731195170432327,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.966929350805003e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.966929350805003e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.20907978415489198,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2643455803394318,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026134973857551815,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03194341510534286,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.793359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04675339683890343,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003194341529160738,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003194341529160738,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29024515151977537,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36637923717498777,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029024516791105272,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029024516791105272,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3880178279199678,
|
|
"calibration/batch_distribution_entropy": 0.976342214061056,
|
|
"calibration/batch_entropy_100bins": 0.947550732636436,
|
|
"calibration/batch_entropy_10bins": 0.976342214061056,
|
|
"calibration/batch_entropy_50bins": 0.9675931845241565,
|
|
"calibration/batch_uniqueness": 0.9630584716796875,
|
|
"calibration/buffer_distribution_entropy": 0.9963760938673006,
|
|
"calibration/buffer_entropy_100bins": 0.9822785458994316,
|
|
"calibration/buffer_entropy_10bins": 0.9963760938673006,
|
|
"calibration/buffer_entropy_50bins": 0.9900233021144178,
|
|
"calibration/confidence_entropy": 0.4652751317872042,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.043359375,
|
|
"calibration/coverage@15%": 0.08515625,
|
|
"calibration/coverage@20%": 0.165234375,
|
|
"calibration/coverage@25%": 0.320703125,
|
|
"calibration/coverage@30%": 0.365234375,
|
|
"calibration/coverage@5%": 0.019921875,
|
|
"calibration/ece": 0.1469109474352704,
|
|
"calibration/mean_confidence": 0.5193943627209796,
|
|
"calibration/prompt_uniqueness": 0.88134765625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 970.2,
|
|
"completions/max_terminated_length": 616.4,
|
|
"completions/mean_length": 163.323828125,
|
|
"completions/mean_terminated_length": 163.05493774414063,
|
|
"completions/min_length": 70.4,
|
|
"completions/min_terminated_length": 70.4,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0010371602838858962,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 382261351.0,
|
|
"reward": 0.9131480097770691,
|
|
"reward_std": 0.09592696875333787,
|
|
"rewards/accuracy_reward": 0.5,
|
|
"rewards/brier_reward": 0.771314287185669,
|
|
"rewards/confidence_uniqueness_reward": 0.9649186968803406,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0033288683742284777,
|
|
"rewards/frontier_coverage_0": 0.1177740141749382,
|
|
"rewards/frontier_coverage_1": 0.1177740141749382,
|
|
"rewards/frontier_coverage_10": 0.1177740141749382,
|
|
"rewards/frontier_coverage_15": 0.1177740141749382,
|
|
"rewards/frontier_coverage_20": 0.1177740141749382,
|
|
"rewards/frontier_coverage_25": 0.1177740141749382,
|
|
"rewards/frontier_coverage_5": 0.1177740141749382,
|
|
"rewards/frontier_ece_reward": 0.014998926036059856,
|
|
"rewards/frontier_entropy_batch_reward": -0.22141122221946716,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1026611328125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13475327789783478,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625,
|
|
"signal/advantage_abs_mean": 0.07525163143873215,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07525163143873215,
|
|
"signal/advantage_pre_scale_std": 0.11405473798513413,
|
|
"signal/advantage_std": 0.11405473798513413,
|
|
"signal/brier_reward/centered_abs_mean": 0.1544673502445221,
|
|
"signal/brier_reward/group_bin_occupancy": 0.859375,
|
|
"signal/brier_reward/group_std_mean": 0.19688616693019867,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015446734987199307,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015446734987199307,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013924498483538628,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.863671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01840968318283558,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001392449880950153,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001392449880950153,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003003736166283488,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004610391240566969,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7546701423707415e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7546701423707415e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19367235004901887,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24632689356803894,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024209044873714446,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03021877408027649,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.042821260541677474,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030218774918466806,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030218774918466806,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2965745747089386,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37327985763549804,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029657458886504173,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029657458886504173,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3511573014841358,
|
|
"calibration/batch_distribution_entropy": 0.9747074938964195,
|
|
"calibration/batch_entropy_100bins": 0.9529573262726035,
|
|
"calibration/batch_entropy_10bins": 0.9747074938964195,
|
|
"calibration/batch_entropy_50bins": 0.9709856182741701,
|
|
"calibration/batch_uniqueness": 0.961004638671875,
|
|
"calibration/buffer_distribution_entropy": 0.9985135010971646,
|
|
"calibration/buffer_entropy_100bins": 0.9882330495239049,
|
|
"calibration/buffer_entropy_10bins": 0.9985135010971646,
|
|
"calibration/buffer_entropy_50bins": 0.99400929049607,
|
|
"calibration/confidence_entropy": 0.4714483178300538,
|
|
"calibration/coverage@0%": 0.0078125,
|
|
"calibration/coverage@1%": 0.0078125,
|
|
"calibration/coverage@10%": 0.135546875,
|
|
"calibration/coverage@15%": 0.205859375,
|
|
"calibration/coverage@20%": 0.29765625,
|
|
"calibration/coverage@25%": 0.34453125,
|
|
"calibration/coverage@30%": 0.398828125,
|
|
"calibration/coverage@5%": 0.04921875,
|
|
"calibration/ece": 0.1429656466704134,
|
|
"calibration/mean_confidence": 0.4671164953742596,
|
|
"calibration/prompt_uniqueness": 0.870458984375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 953.2,
|
|
"completions/max_terminated_length": 579.2,
|
|
"completions/mean_length": 162.4927734375,
|
|
"completions/mean_terminated_length": 162.2256286621094,
|
|
"completions/min_length": 69.4,
|
|
"completions/min_terminated_length": 69.4,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0010119343642145395,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 398781789.0,
|
|
"reward": 0.9290378093719482,
|
|
"reward_std": 0.09455136507749558,
|
|
"rewards/accuracy_reward": 0.530859375,
|
|
"rewards/brier_reward": 0.7825330376625061,
|
|
"rewards/confidence_uniqueness_reward": 0.9647130966186523,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003093740437179804,
|
|
"rewards/frontier_coverage_0": 0.10442648828029633,
|
|
"rewards/frontier_coverage_1": 0.10442648828029633,
|
|
"rewards/frontier_coverage_10": 0.10442648828029633,
|
|
"rewards/frontier_coverage_15": 0.10442648828029633,
|
|
"rewards/frontier_coverage_20": 0.10442648828029633,
|
|
"rewards/frontier_coverage_25": 0.10442648828029633,
|
|
"rewards/frontier_coverage_5": 0.10442648828029633,
|
|
"rewards/frontier_ece_reward": 0.013694177567958831,
|
|
"rewards/frontier_entropy_batch_reward": -0.21486915349960328,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10360107421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13945001363754272,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051800537109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051800537109375,
|
|
"signal/advantage_abs_mean": 0.07290669530630112,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07290669530630112,
|
|
"signal/advantage_pre_scale_std": 0.11290555596351623,
|
|
"signal/advantage_std": 0.11290555596351623,
|
|
"signal/brier_reward/centered_abs_mean": 0.13870272636413575,
|
|
"signal/brier_reward/group_bin_occupancy": 0.841796875,
|
|
"signal/brier_reward/group_std_mean": 0.1802999347448349,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013870272599160672,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013870272599160672,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013296573236584664,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.883203125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017479157820343972,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013296573655679823,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013296573655679823,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002974971802905202,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004687594994902611,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.718714833667036e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.718714833667036e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18035527765750886,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2348244309425354,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022544410079717637,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02462676987051964,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.832421875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03513662964105606,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002462677052244544,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002462677052244544,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28050180673599245,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35507087111473085,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02805018164217472,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02805018164217472,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.42825535009099047,
|
|
"calibration/batch_distribution_entropy": 0.9843865680153211,
|
|
"calibration/batch_entropy_100bins": 0.9538506842868667,
|
|
"calibration/batch_entropy_10bins": 0.9843865680153211,
|
|
"calibration/batch_entropy_50bins": 0.9709620915826056,
|
|
"calibration/batch_uniqueness": 0.9653472900390625,
|
|
"calibration/buffer_distribution_entropy": 0.9992208380784964,
|
|
"calibration/buffer_entropy_100bins": 0.9907419869998911,
|
|
"calibration/buffer_entropy_10bins": 0.9992208380784964,
|
|
"calibration/buffer_entropy_50bins": 0.9958122546116293,
|
|
"calibration/confidence_entropy": 0.5159266248918248,
|
|
"calibration/coverage@0%": 0.00390625,
|
|
"calibration/coverage@1%": 0.00390625,
|
|
"calibration/coverage@10%": 0.010546875,
|
|
"calibration/coverage@15%": 0.016796875,
|
|
"calibration/coverage@20%": 0.04375,
|
|
"calibration/coverage@25%": 0.080859375,
|
|
"calibration/coverage@30%": 0.129296875,
|
|
"calibration/coverage@5%": 0.00390625,
|
|
"calibration/ece": 0.14866836772807127,
|
|
"calibration/mean_confidence": 0.512600992886363,
|
|
"calibration/prompt_uniqueness": 0.88505859375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 912.2,
|
|
"completions/max_terminated_length": 671.4,
|
|
"completions/mean_length": 163.5146484375,
|
|
"completions/mean_terminated_length": 163.24703369140624,
|
|
"completions/min_length": 72.4,
|
|
"completions/min_terminated_length": 72.4,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0015616186428815126,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 415492627.0,
|
|
"reward": 0.9132670998573303,
|
|
"reward_std": 0.10271851271390915,
|
|
"rewards/accuracy_reward": 0.50390625,
|
|
"rewards/brier_reward": 0.7699137806892395,
|
|
"rewards/confidence_uniqueness_reward": 0.9647279858589173,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.003537365049123764,
|
|
"rewards/frontier_coverage_0": 0.10254341214895249,
|
|
"rewards/frontier_coverage_1": 0.10254341214895249,
|
|
"rewards/frontier_coverage_10": 0.10254341214895249,
|
|
"rewards/frontier_coverage_15": 0.10254341214895249,
|
|
"rewards/frontier_coverage_20": 0.10254341214895249,
|
|
"rewards/frontier_coverage_25": 0.10254341214895249,
|
|
"rewards/frontier_coverage_5": 0.10254341214895249,
|
|
"rewards/frontier_ece_reward": 0.010206561535596848,
|
|
"rewards/frontier_entropy_batch_reward": -0.21855055093765258,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11923828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.15680868923664093,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059619140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059619140625,
|
|
"signal/advantage_abs_mean": 0.07999172508716583,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07999172508716583,
|
|
"signal/advantage_pre_scale_std": 0.1227585643529892,
|
|
"signal/advantage_std": 0.1227585643529892,
|
|
"signal/brier_reward/centered_abs_mean": 0.1432872533798218,
|
|
"signal/brier_reward/group_bin_occupancy": 0.874609375,
|
|
"signal/brier_reward/group_std_mean": 0.1836364448070526,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014328726008534432,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014328726008534432,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01301488820463419,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018223760277032854,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013014888390898705,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013014888390898705,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0033591561019420623,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7171875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0054647172801196575,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1989451710833235e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1989451710833235e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1780136674642563,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2294588565826416,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022251708433032036,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.020605326071381568,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.029997162893414496,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0020605326164513825,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0020605326164513825,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2866648018360138,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.751171875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36136451959609983,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028666481375694275,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028666481375694275,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32033459954652566,
|
|
"calibration/batch_distribution_entropy": 0.9757990834357446,
|
|
"calibration/batch_entropy_100bins": 0.9516993202749005,
|
|
"calibration/batch_entropy_10bins": 0.9757990834357446,
|
|
"calibration/batch_entropy_50bins": 0.969603238359294,
|
|
"calibration/batch_uniqueness": 0.964125328319047,
|
|
"calibration/buffer_distribution_entropy": 0.9992577401022663,
|
|
"calibration/buffer_entropy_100bins": 0.990822037253148,
|
|
"calibration/buffer_entropy_10bins": 0.9992577401022663,
|
|
"calibration/buffer_entropy_50bins": 0.9958939450746145,
|
|
"calibration/confidence_entropy": 0.5194667423516511,
|
|
"calibration/coverage@0%": 0.0027366682974559685,
|
|
"calibration/coverage@1%": 0.0027366682974559685,
|
|
"calibration/coverage@10%": 0.025431139921722114,
|
|
"calibration/coverage@15%": 0.092284582925636,
|
|
"calibration/coverage@20%": 0.14117158721365258,
|
|
"calibration/coverage@25%": 0.2542489735620276,
|
|
"calibration/coverage@30%": 0.43019097621925484,
|
|
"calibration/coverage@5%": 0.0027366682974559685,
|
|
"calibration/ece": 0.11552021771290169,
|
|
"calibration/mean_confidence": 0.5210242359988329,
|
|
"calibration/prompt_uniqueness": 0.8861906174575818,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 1388.2,
|
|
"completions/max_terminated_length": 1059.2,
|
|
"completions/mean_length": 172.62021484375,
|
|
"completions/mean_terminated_length": 171.42176208496093,
|
|
"completions/min_length": 73.6,
|
|
"completions/min_terminated_length": 73.6,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0010845692595466971,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 432141442.0,
|
|
"reward": 0.9234651088714599,
|
|
"reward_std": 0.10108603686094284,
|
|
"rewards/accuracy_reward": 0.518359375,
|
|
"rewards/brier_reward": 0.7800793528556824,
|
|
"rewards/confidence_uniqueness_reward": 0.9633147358894348,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.00315277217887342,
|
|
"rewards/frontier_coverage_0": 0.10128591805696488,
|
|
"rewards/frontier_coverage_1": 0.10128591805696488,
|
|
"rewards/frontier_coverage_10": 0.10128591805696488,
|
|
"rewards/frontier_coverage_15": 0.10128591805696488,
|
|
"rewards/frontier_coverage_20": 0.10128591805696488,
|
|
"rewards/frontier_coverage_25": 0.10118604749441147,
|
|
"rewards/frontier_coverage_5": 0.10128591805696488,
|
|
"rewards/frontier_ece_reward": 0.01040429063141346,
|
|
"rewards/frontier_entropy_batch_reward": -0.1947682112455368,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.11812744140625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15311342775821685,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.059063720703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.059063720703125,
|
|
"signal/advantage_abs_mean": 0.07872487008571624,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07872487008571624,
|
|
"signal/advantage_pre_scale_std": 0.1202880859375,
|
|
"signal/advantage_std": 0.1202880859375,
|
|
"signal/brier_reward/centered_abs_mean": 0.14082336127758027,
|
|
"signal/brier_reward/group_bin_occupancy": 0.875,
|
|
"signal/brier_reward/group_std_mean": 0.17933386862277984,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01408233605325222,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01408233605325222,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013442078977823258,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.888671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01916816532611847,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013442079536616803,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013442079536616803,
|
|
"signal/format_reward/centered_abs_mean": 0.001678466796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.127734375,
|
|
"signal/format_reward/group_std_mean": 0.004299227613955736,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008392333984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008392333984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031103747431188824,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005056559341028333,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.887968414346687e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.887968414346687e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.18864355981349945,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2383899211883545,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0023580444511026146,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023580444511026146,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18912857472896577,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23899484276771546,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002364107267931104,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018303705751895903,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.026779073104262353,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001830370631068945,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001830370631068945,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27329595685005187,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3500793755054474,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027329596504569054,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027329596504569054,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2711466896875542,
|
|
"calibration/batch_distribution_entropy": 0.9824704911633383,
|
|
"calibration/batch_entropy_100bins": 0.957537800410772,
|
|
"calibration/batch_entropy_10bins": 0.9824704911633383,
|
|
"calibration/batch_entropy_50bins": 0.9750580937567286,
|
|
"calibration/batch_uniqueness": 0.964057967535398,
|
|
"calibration/buffer_distribution_entropy": 0.9990826808437717,
|
|
"calibration/buffer_entropy_100bins": 0.9903883943102378,
|
|
"calibration/buffer_entropy_10bins": 0.9990826808437717,
|
|
"calibration/buffer_entropy_50bins": 0.9955856525305034,
|
|
"calibration/confidence_entropy": 0.4754514819609604,
|
|
"calibration/coverage@0%": 0.005876225490196079,
|
|
"calibration/coverage@1%": 0.005876225490196079,
|
|
"calibration/coverage@10%": 0.07948835784313726,
|
|
"calibration/coverage@15%": 0.22814797794117644,
|
|
"calibration/coverage@20%": 0.3243229166666667,
|
|
"calibration/coverage@25%": 0.42829197303921573,
|
|
"calibration/coverage@30%": 0.5064721200980392,
|
|
"calibration/coverage@5%": 0.024699754901960785,
|
|
"calibration/ece": 0.11323366571529961,
|
|
"calibration/mean_confidence": 0.5354937992420586,
|
|
"calibration/prompt_uniqueness": 0.8668369638560094,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 948.8,
|
|
"completions/max_terminated_length": 640.8,
|
|
"completions/mean_length": 176.71396484375,
|
|
"completions/mean_terminated_length": 176.18282775878907,
|
|
"completions/min_length": 83.8,
|
|
"completions/min_terminated_length": 83.8,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0012650451390072703,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 448965329.0,
|
|
"reward": 0.9427853345870971,
|
|
"reward_std": 0.09141346216201782,
|
|
"rewards/accuracy_reward": 0.5541015625,
|
|
"rewards/brier_reward": 0.795646071434021,
|
|
"rewards/confidence_uniqueness_reward": 0.9624568223953247,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002685644570738077,
|
|
"rewards/frontier_coverage_0": 0.10276300571858883,
|
|
"rewards/frontier_coverage_1": 0.10276300571858883,
|
|
"rewards/frontier_coverage_10": 0.10276300571858883,
|
|
"rewards/frontier_coverage_15": 0.10276300571858883,
|
|
"rewards/frontier_coverage_20": 0.10276300571858883,
|
|
"rewards/frontier_coverage_25": 0.10133399069309235,
|
|
"rewards/frontier_coverage_5": 0.10276300571858883,
|
|
"rewards/frontier_ece_reward": 0.012095463648438453,
|
|
"rewards/frontier_entropy_batch_reward": -0.2003028452396393,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10240478515625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.173046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13419998735189437,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.615625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051202392578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051202392578125,
|
|
"signal/advantage_abs_mean": 0.07144368439912796,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07144368439912796,
|
|
"signal/advantage_pre_scale_std": 0.11116426140069961,
|
|
"signal/advantage_std": 0.11116426140069961,
|
|
"signal/brier_reward/centered_abs_mean": 0.1337427169084549,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84296875,
|
|
"signal/brier_reward/group_std_mean": 0.171070197224617,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013374271430075168,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013374271430075168,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013302310928702354,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.88203125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01829577349126339,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001330231106840074,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001330231106840074,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0022097086068242787,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003123843017965555,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0052942352835088965,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.904803670593537e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.904803670593537e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17443813383579254,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22237459123134612,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021804766729474068,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021804766729474068,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1775657594203949,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22634563744068145,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022195718716830014,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017705311998724937,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.836328125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025692766532301903,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017705312930047512,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017705312930047512,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27649489641189573,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3526521801948547,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027649490535259245,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027649490535259245,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3010584308084431,
|
|
"calibration/batch_distribution_entropy": 0.9641241091381325,
|
|
"calibration/batch_entropy_100bins": 0.9505054970417863,
|
|
"calibration/batch_entropy_10bins": 0.9641241091381325,
|
|
"calibration/batch_entropy_50bins": 0.9653212354537498,
|
|
"calibration/batch_uniqueness": 0.9634041782070479,
|
|
"calibration/buffer_distribution_entropy": 0.9983903237821101,
|
|
"calibration/buffer_entropy_100bins": 0.9894302549349414,
|
|
"calibration/buffer_entropy_10bins": 0.9983903237821101,
|
|
"calibration/buffer_entropy_50bins": 0.9949646771618358,
|
|
"calibration/confidence_entropy": 0.5043917074884562,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.032421875,
|
|
"calibration/coverage@15%": 0.092578125,
|
|
"calibration/coverage@20%": 0.1625,
|
|
"calibration/coverage@25%": 0.28093428938356163,
|
|
"calibration/coverage@30%": 0.4881788160469667,
|
|
"calibration/coverage@5%": 0.00546875,
|
|
"calibration/ece": 0.13595115217602646,
|
|
"calibration/mean_confidence": 0.5649633774254619,
|
|
"calibration/prompt_uniqueness": 0.8865723164347035,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 563.2,
|
|
"completions/mean_length": 187.29921875,
|
|
"completions/mean_terminated_length": 186.64035034179688,
|
|
"completions/min_length": 76.4,
|
|
"completions/min_terminated_length": 76.4,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.00112549914047122,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 465836073.0,
|
|
"reward": 0.9279234051704407,
|
|
"reward_std": 0.09372627437114715,
|
|
"rewards/accuracy_reward": 0.52568359375,
|
|
"rewards/brier_reward": 0.7868773221969605,
|
|
"rewards/confidence_uniqueness_reward": 0.9617484331130981,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0032899423968046904,
|
|
"rewards/frontier_coverage_0": 0.10912051647901536,
|
|
"rewards/frontier_coverage_1": 0.10912051647901536,
|
|
"rewards/frontier_coverage_10": 0.10912051647901536,
|
|
"rewards/frontier_coverage_15": 0.10912051647901536,
|
|
"rewards/frontier_coverage_20": 0.10912051647901536,
|
|
"rewards/frontier_coverage_25": 0.10755196064710618,
|
|
"rewards/frontier_coverage_5": 0.10912051647901536,
|
|
"rewards/frontier_ece_reward": 0.010184999741613864,
|
|
"rewards/frontier_entropy_batch_reward": -0.19944992065429687,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.107452392578125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1377037927508354,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0537261962890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0537261962890625,
|
|
"signal/advantage_abs_mean": 0.0733189657330513,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0733189657330513,
|
|
"signal/advantage_pre_scale_std": 0.11423833519220353,
|
|
"signal/advantage_std": 0.11423833519220353,
|
|
"signal/brier_reward/centered_abs_mean": 0.14163122177124024,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85234375,
|
|
"signal/brier_reward/group_std_mean": 0.18140933215618132,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01416312251240015,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01416312251240015,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013178028725087642,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.90078125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01842593662440777,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013178028631955386,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013178028631955386,
|
|
"signal/format_reward/centered_abs_mean": 0.001312255859375,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0035306816920638085,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006561279296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003526174183934927,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.690234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006031551398336887,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.40771778812632e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.40771778812632e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17763153314590455,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.878125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22553324997425078,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022203943226486444,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022203943226486444,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18577166497707367,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23553779423236848,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023221459705382586,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.017202311754226686,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025163047760725022,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017202311893925071,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017202311893925071,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2695195287466049,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75234375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34306603074073794,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026951952651143075,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026951952651143075,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3905655690563325,
|
|
"calibration/batch_distribution_entropy": 0.9803796350881079,
|
|
"calibration/batch_entropy_100bins": 0.960040144438605,
|
|
"calibration/batch_entropy_10bins": 0.9803796350881079,
|
|
"calibration/batch_entropy_50bins": 0.9728294233983587,
|
|
"calibration/batch_uniqueness": 0.9602142333984375,
|
|
"calibration/buffer_distribution_entropy": 0.9979704423206602,
|
|
"calibration/buffer_entropy_100bins": 0.988638040636473,
|
|
"calibration/buffer_entropy_10bins": 0.9979704423206602,
|
|
"calibration/buffer_entropy_50bins": 0.9945346302719053,
|
|
"calibration/confidence_entropy": 0.5143790373022907,
|
|
"calibration/coverage@0%": 0.008984375,
|
|
"calibration/coverage@1%": 0.008984375,
|
|
"calibration/coverage@10%": 0.016015625,
|
|
"calibration/coverage@15%": 0.03046875,
|
|
"calibration/coverage@20%": 0.125,
|
|
"calibration/coverage@25%": 0.183984375,
|
|
"calibration/coverage@30%": 0.294921875,
|
|
"calibration/coverage@5%": 0.008984375,
|
|
"calibration/ece": 0.13917155590776573,
|
|
"calibration/mean_confidence": 0.5079795467692557,
|
|
"calibration/prompt_uniqueness": 0.87216796875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1347.4,
|
|
"completions/max_terminated_length": 1031.8,
|
|
"completions/mean_length": 191.52421875,
|
|
"completions/mean_terminated_length": 190.86587219238282,
|
|
"completions/min_length": 85.0,
|
|
"completions/min_terminated_length": 85.0,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0007277204422280192,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 482968097.0,
|
|
"reward": 0.8982602834701539,
|
|
"reward_std": 0.08826989978551865,
|
|
"rewards/accuracy_reward": 0.46787109375,
|
|
"rewards/brier_reward": 0.7684149622917176,
|
|
"rewards/confidence_uniqueness_reward": 0.959271764755249,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0039596253540366885,
|
|
"rewards/frontier_coverage_0": 0.1321122795343399,
|
|
"rewards/frontier_coverage_1": 0.1321122795343399,
|
|
"rewards/frontier_coverage_10": 0.1321122795343399,
|
|
"rewards/frontier_coverage_15": 0.1321122795343399,
|
|
"rewards/frontier_coverage_20": 0.1321122795343399,
|
|
"rewards/frontier_coverage_25": 0.12441358044743538,
|
|
"rewards/frontier_coverage_5": 0.1321122795343399,
|
|
"rewards/frontier_ece_reward": 0.007002122979611158,
|
|
"rewards/frontier_entropy_batch_reward": -0.2031411647796631,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085882568359375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1213410884141922,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0429412841796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429412841796875,
|
|
"signal/advantage_abs_mean": 0.06655814126133919,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06655814126133919,
|
|
"signal/advantage_pre_scale_std": 0.10574809014797211,
|
|
"signal/advantage_std": 0.10574809014797211,
|
|
"signal/brier_reward/centered_abs_mean": 0.14038530886173248,
|
|
"signal/brier_reward/group_bin_occupancy": 0.861328125,
|
|
"signal/brier_reward/group_std_mean": 0.1817769706249237,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014038531482219696,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014038531482219696,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013213860616087914,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.916796875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018184344843029977,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013213861035183071,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013213861035183071,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036213297862559557,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.691796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.006112007796764374,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.526662451098673e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.526662451098673e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16017631590366363,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87734375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20728962421417235,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020022039767354726,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020022039767354726,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17165872752666472,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22206704020500184,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145734056830406,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01575020458549261,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.818359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02388475425541401,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015750204911455512,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015750204911455512,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2742986440658569,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34781610369682314,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027429865673184395,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027429865673184395,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2997569453896353,
|
|
"calibration/batch_distribution_entropy": 0.9792118212906589,
|
|
"calibration/batch_entropy_100bins": 0.9606337149903614,
|
|
"calibration/batch_entropy_10bins": 0.9792118212906589,
|
|
"calibration/batch_entropy_50bins": 0.9712305855926182,
|
|
"calibration/batch_uniqueness": 0.9567291259765625,
|
|
"calibration/buffer_distribution_entropy": 0.9978132956107622,
|
|
"calibration/buffer_entropy_100bins": 0.9881803114059542,
|
|
"calibration/buffer_entropy_10bins": 0.9978132956107622,
|
|
"calibration/buffer_entropy_50bins": 0.9942871392551943,
|
|
"calibration/confidence_entropy": 0.4884862201915487,
|
|
"calibration/coverage@0%": 0.0109375,
|
|
"calibration/coverage@1%": 0.0109375,
|
|
"calibration/coverage@10%": 0.016015625,
|
|
"calibration/coverage@15%": 0.1421875,
|
|
"calibration/coverage@20%": 0.287109375,
|
|
"calibration/coverage@25%": 0.38203125,
|
|
"calibration/coverage@30%": 0.556640625,
|
|
"calibration/coverage@5%": 0.0125,
|
|
"calibration/ece": 0.13674428667727873,
|
|
"calibration/mean_confidence": 0.509401226049053,
|
|
"calibration/prompt_uniqueness": 0.86796875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 947.6,
|
|
"completions/mean_length": 194.65380859375,
|
|
"completions/mean_terminated_length": 193.9985321044922,
|
|
"completions/min_length": 86.8,
|
|
"completions/min_terminated_length": 86.8,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0011819824576377869,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0015,
|
|
"num_tokens": 500009384.0,
|
|
"reward": 0.9241943120956421,
|
|
"reward_std": 0.09635329693555832,
|
|
"rewards/accuracy_reward": 0.51845703125,
|
|
"rewards/brier_reward": 0.7792062282562255,
|
|
"rewards/confidence_uniqueness_reward": 0.9568382143974304,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0031862443778663875,
|
|
"rewards/frontier_coverage_0": 0.11428727954626083,
|
|
"rewards/frontier_coverage_1": 0.11428727954626083,
|
|
"rewards/frontier_coverage_10": 0.11428727954626083,
|
|
"rewards/frontier_coverage_15": 0.11428727954626083,
|
|
"rewards/frontier_coverage_20": 0.11371297538280487,
|
|
"rewards/frontier_coverage_25": 0.1081900030374527,
|
|
"rewards/frontier_coverage_5": 0.11428727954626083,
|
|
"rewards/frontier_ece_reward": 0.007919181045144797,
|
|
"rewards/frontier_entropy_batch_reward": -0.19063332974910735,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.120074462890625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.18046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15685472190380095,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0600372314453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0600372314453125,
|
|
"signal/advantage_abs_mean": 0.07497897297143936,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07497897297143936,
|
|
"signal/advantage_pre_scale_std": 0.11704835444688796,
|
|
"signal/advantage_std": 0.11704835444688796,
|
|
"signal/brier_reward/centered_abs_mean": 0.13743520379066468,
|
|
"signal/brier_reward/group_bin_occupancy": 0.838671875,
|
|
"signal/brier_reward/group_std_mean": 0.17770840525627135,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013743520341813564,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013743520341813564,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01321981344372034,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93359375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018044329062104226,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001321981381624937,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001321981381624937,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0031342420261353254,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70390625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005184091906994581,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.917802387150004e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.917802387150004e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.18790066838264466,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.24077147245407104,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18790066838264466,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24077147245407104,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18790066838264466,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24077147245407104,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18790066838264466,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24077147245407104,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18661079108715056,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23916022181510926,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002332634944468737,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002332634944468737,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17039817869663237,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.21902235150337218,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021299772663041948,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021299772663041948,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18790066838264466,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24077147245407104,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023487584665417673,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.014475966058671474,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.822265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022006630897521973,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0014475966105237602,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0014475966105237602,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26103139519691465,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.746484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3364805102348328,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02610314004123211,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02610314004123211,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.4928207956337317,
|
|
"eval_calibration/batch_distribution_entropy": 0.9448460828266618,
|
|
"eval_calibration/batch_entropy_100bins": 0.7140585686804589,
|
|
"eval_calibration/batch_entropy_10bins": 0.9448460828266618,
|
|
"eval_calibration/batch_entropy_50bins": 0.7938259760149764,
|
|
"eval_calibration/batch_uniqueness": 0.9052734375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9977152222728605,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9880421883831635,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9977152222728605,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9941686668937713,
|
|
"eval_calibration/confidence_entropy": 0.4849995680216249,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.09375,
|
|
"eval_calibration/coverage@25%": 0.1484375,
|
|
"eval_calibration/coverage@30%": 0.1484375,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.22999326971199952,
|
|
"eval_calibration/mean_confidence": 0.4614743513558307,
|
|
"eval_calibration/prompt_uniqueness": 0.9052734375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 434.5,
|
|
"eval_completions/max_terminated_length": 434.5,
|
|
"eval_completions/mean_length": 194.3330421447754,
|
|
"eval_completions/mean_terminated_length": 194.3330421447754,
|
|
"eval_completions/min_length": 98.0,
|
|
"eval_completions/min_terminated_length": 98.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 500009384.0,
|
|
"eval_reward": 0.7936547994613647,
|
|
"eval_reward_std": 0.2236923649907112,
|
|
"eval_rewards/accuracy_reward": 0.416015625,
|
|
"eval_rewards/brier_reward": 0.7854552268981934,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.904296875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0038128122105263174,
|
|
"eval_rewards/frontier_coverage_0": 0.18677300214767456,
|
|
"eval_rewards/frontier_coverage_1": 0.18677300214767456,
|
|
"eval_rewards/frontier_coverage_10": 0.18677300214767456,
|
|
"eval_rewards/frontier_coverage_15": 0.18677300214767456,
|
|
"eval_rewards/frontier_coverage_20": 0.18603158369660378,
|
|
"eval_rewards/frontier_coverage_25": 0.1658840924501419,
|
|
"eval_rewards/frontier_coverage_5": 0.18677300214767456,
|
|
"eval_rewards/frontier_ece_reward": 0.0064718994544819,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 22.8621,
|
|
"eval_samples_per_second": 21.87,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4705810546875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49238111078739166,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23529052734375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23529052734375,
|
|
"eval_signal/advantage_abs_mean": 0.20876647159457207,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20876647159457207,
|
|
"eval_signal/advantage_pre_scale_std": 0.22128642722964287,
|
|
"eval_signal/advantage_std": 0.22128642722964287,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.1912023350596428,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.890625,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2429308146238327,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019120234064757824,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.019120234064757824,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0374603271484375,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.390625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04364745691418648,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003746032773051411,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003746032773051411,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004567834781482816,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008484951569698751,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7097938224615064e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7097938224615064e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.35640130937099457,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4366024136543274,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.35640130937099457,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4366024136543274,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35640130937099457,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4366024136543274,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35640130937099457,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4366024136543274,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.35413555800914764,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4339291825890541,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044266944751143456,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044266944751143456,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.31332943588495255,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.3852032795548439,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003916618006769568,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003916618006769568,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35640130937099457,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4366024136543274,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044550164602696896,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013189757708460093,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.018085308838635683,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013189757883083075,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013189757883083075,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.175,
|
|
"step": 150
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3905210632456232,
|
|
"calibration/batch_distribution_entropy": 0.9905343495592323,
|
|
"calibration/batch_entropy_100bins": 0.9699764400711286,
|
|
"calibration/batch_entropy_10bins": 0.9905343495592323,
|
|
"calibration/batch_entropy_50bins": 0.97994022779757,
|
|
"calibration/batch_uniqueness": 0.9584047876376637,
|
|
"calibration/buffer_distribution_entropy": 0.9977835093627524,
|
|
"calibration/buffer_entropy_100bins": 0.9880724924738754,
|
|
"calibration/buffer_entropy_10bins": 0.9977835093627524,
|
|
"calibration/buffer_entropy_50bins": 0.9941551940853858,
|
|
"calibration/confidence_entropy": 0.500662742867437,
|
|
"calibration/coverage@0%": 0.005078889432485323,
|
|
"calibration/coverage@1%": 0.005078889432485323,
|
|
"calibration/coverage@10%": 0.08125076443248533,
|
|
"calibration/coverage@15%": 0.1417976394324853,
|
|
"calibration/coverage@20%": 0.1953132644324853,
|
|
"calibration/coverage@25%": 0.2523613319471624,
|
|
"calibration/coverage@30%": 0.29494786570450093,
|
|
"calibration/coverage@5%": 0.03828201443248532,
|
|
"calibration/ece": 0.13589736223677878,
|
|
"calibration/mean_confidence": 0.49380390196528123,
|
|
"calibration/prompt_uniqueness": 0.8630719648315557,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0005859375,
|
|
"completions/max_length": 1201.8,
|
|
"completions/max_terminated_length": 791.8,
|
|
"completions/mean_length": 193.1357421875,
|
|
"completions/mean_terminated_length": 192.3479034423828,
|
|
"completions/min_length": 88.4,
|
|
"completions/min_terminated_length": 88.4,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.0008443639962933958,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0025,
|
|
"num_tokens": 517294934.0,
|
|
"reward": 0.9380232334136963,
|
|
"reward_std": 0.08917539864778519,
|
|
"rewards/accuracy_reward": 0.547265625,
|
|
"rewards/brier_reward": 0.7819605112075806,
|
|
"rewards/confidence_uniqueness_reward": 0.956698739528656,
|
|
"rewards/format_reward": 0.9994140625,
|
|
"rewards/frontier_aurc_reward": -0.002871061023324728,
|
|
"rewards/frontier_coverage_0": 0.09036671817302704,
|
|
"rewards/frontier_coverage_1": 0.09036671817302704,
|
|
"rewards/frontier_coverage_10": 0.09036671817302704,
|
|
"rewards/frontier_coverage_15": 0.09036671817302704,
|
|
"rewards/frontier_coverage_20": 0.08979679197072983,
|
|
"rewards/frontier_coverage_25": 0.075638347864151,
|
|
"rewards/frontier_coverage_5": 0.09036671817302704,
|
|
"rewards/frontier_ece_reward": 0.006273471284657717,
|
|
"rewards/frontier_entropy_batch_reward": -0.17489843368530272,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0958251953125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13047634959220886,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04791259765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04791259765625,
|
|
"signal/advantage_abs_mean": 0.06811611354351044,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06811611354351044,
|
|
"signal/advantage_pre_scale_std": 0.10838208794593811,
|
|
"signal/advantage_std": 0.10838208794593811,
|
|
"signal/brier_reward/centered_abs_mean": 0.13192115724086761,
|
|
"signal/brier_reward/group_bin_occupancy": 0.869140625,
|
|
"signal/brier_reward/group_std_mean": 0.168493589758873,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013192116282880306,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013192116282880306,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012967484071850777,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018038667924702167,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001296748430468142,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001296748430468142,
|
|
"signal/format_reward/centered_abs_mean": 0.00113525390625,
|
|
"signal/format_reward/group_bin_occupancy": 0.12734375,
|
|
"signal/format_reward/group_std_mean": 0.0033145629335194827,
|
|
"signal/format_reward/group_zero_std_frac": 0.98125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028141734655946493,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004710181429982185,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.517716831993312e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.517716831993312e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16962937116622925,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2177934467792511,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16962937116622925,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2177934467792511,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16962937116622925,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2177934467792511,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16962937116622925,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2177934467792511,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16856757402420045,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2164437383413315,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021070946007966996,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021070946007966996,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.14662111103534697,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1890866458415985,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001832763897255063,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001832763897255063,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16962937116622925,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2177934467792511,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002120367041788995,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011038328520953655,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014819971285760403,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011038328986614943,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011038328986614943,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.256817501783371,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3358631134033203,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.025681750476360322,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025681750476360322,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31790763537690625,
|
|
"calibration/batch_distribution_entropy": 0.9855522887040239,
|
|
"calibration/batch_entropy_100bins": 0.9685646417212451,
|
|
"calibration/batch_entropy_10bins": 0.9855522887040239,
|
|
"calibration/batch_entropy_50bins": 0.9776787715207496,
|
|
"calibration/batch_uniqueness": 0.9575121754105227,
|
|
"calibration/buffer_distribution_entropy": 0.9980551921710852,
|
|
"calibration/buffer_entropy_100bins": 0.9885994859100894,
|
|
"calibration/buffer_entropy_10bins": 0.9980551921710852,
|
|
"calibration/buffer_entropy_50bins": 0.9943812246542099,
|
|
"calibration/confidence_entropy": 0.5099366283838993,
|
|
"calibration/coverage@0%": 0.023481837084148728,
|
|
"calibration/coverage@1%": 0.023481837084148728,
|
|
"calibration/coverage@10%": 0.19366820572407045,
|
|
"calibration/coverage@15%": 0.3277481347847358,
|
|
"calibration/coverage@20%": 0.3950319532778865,
|
|
"calibration/coverage@25%": 0.4333506604696673,
|
|
"calibration/coverage@30%": 0.4865199975538161,
|
|
"calibration/coverage@5%": 0.07631941046966731,
|
|
"calibration/ece": 0.137644847278309,
|
|
"calibration/mean_confidence": 0.5067424365660879,
|
|
"calibration/prompt_uniqueness": 0.870397403014438,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1051.6,
|
|
"completions/max_terminated_length": 664.6,
|
|
"completions/mean_length": 187.24072265625,
|
|
"completions/mean_terminated_length": 186.84423217773437,
|
|
"completions/min_length": 82.6,
|
|
"completions/min_terminated_length": 82.6,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0013515661703422666,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 534357943.0,
|
|
"reward": 0.9389643549919129,
|
|
"reward_std": 0.08940067738294602,
|
|
"rewards/accuracy_reward": 0.5443359375,
|
|
"rewards/brier_reward": 0.8010304689407348,
|
|
"rewards/confidence_uniqueness_reward": 0.9571277260780334,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002591008087620139,
|
|
"rewards/frontier_coverage_0": 0.11089163199067116,
|
|
"rewards/frontier_coverage_1": 0.11089163199067116,
|
|
"rewards/frontier_coverage_10": 0.11089163199067116,
|
|
"rewards/frontier_coverage_15": 0.11089163199067116,
|
|
"rewards/frontier_coverage_20": 0.11040212810039521,
|
|
"rewards/frontier_coverage_25": 0.0975722998380661,
|
|
"rewards/frontier_coverage_5": 0.11089163199067116,
|
|
"rewards/frontier_ece_reward": 0.007399659510701895,
|
|
"rewards/frontier_entropy_batch_reward": -0.19110932052135468,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09991455078125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.173828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13368143737316132,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049957275390625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049957275390625,
|
|
"signal/advantage_abs_mean": 0.0693469613790512,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0693469613790512,
|
|
"signal/advantage_pre_scale_std": 0.10971838235855103,
|
|
"signal/advantage_std": 0.10971838235855103,
|
|
"signal/brier_reward/centered_abs_mean": 0.12356914579868317,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84921875,
|
|
"signal/brier_reward/group_std_mean": 0.16091051399707795,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012356914579868317,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012356914579868317,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011988498829305172,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.946484375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015966850332915783,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011988498736172915,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011988498736172915,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028497665654867886,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004724315833300352,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.562208294169977e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.562208294169977e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1604818731546402,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21088581383228303,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1604818731546402,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21088581383228303,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1604818731546402,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21088581383228303,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1604818731546402,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21088581383228303,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15974161326885222,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2099863260984421,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00199677012860775,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00199677012860775,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1347096398472786,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17761588990688323,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016838705167174339,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016838705167174339,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1604818731546402,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21088581383228303,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020060235168784858,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010506413504481315,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01373392753303051,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010506413877010346,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010506413877010346,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2652657926082611,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.342242556810379,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652658075094223,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652658075094223,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2116557679486677,
|
|
"calibration/batch_distribution_entropy": 0.9857721529055761,
|
|
"calibration/batch_entropy_100bins": 0.9656648820603996,
|
|
"calibration/batch_entropy_10bins": 0.9857721529055761,
|
|
"calibration/batch_entropy_50bins": 0.9778444274441169,
|
|
"calibration/batch_uniqueness": 0.9589366543250588,
|
|
"calibration/buffer_distribution_entropy": 0.9982783670040604,
|
|
"calibration/buffer_entropy_100bins": 0.9890758203614105,
|
|
"calibration/buffer_entropy_10bins": 0.9982783670040604,
|
|
"calibration/buffer_entropy_50bins": 0.9945921630733402,
|
|
"calibration/confidence_entropy": 0.47936185596729713,
|
|
"calibration/coverage@0%": 0.03830418297455969,
|
|
"calibration/coverage@1%": 0.03830418297455969,
|
|
"calibration/coverage@10%": 0.27363090141878665,
|
|
"calibration/coverage@15%": 0.34475752201565557,
|
|
"calibration/coverage@20%": 0.5264394263698631,
|
|
"calibration/coverage@25%": 0.616313753669276,
|
|
"calibration/coverage@30%": 0.7604887781311154,
|
|
"calibration/coverage@5%": 0.17943141511741684,
|
|
"calibration/ece": 0.11594575333235538,
|
|
"calibration/mean_confidence": 0.5187620094786958,
|
|
"calibration/prompt_uniqueness": 0.8553659111602497,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1092.6,
|
|
"completions/max_terminated_length": 778.2,
|
|
"completions/mean_length": 187.25498046875,
|
|
"completions/mean_terminated_length": 186.8594207763672,
|
|
"completions/min_length": 85.4,
|
|
"completions/min_terminated_length": 85.4,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0011601398000493646,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 551304970.0,
|
|
"reward": 0.9393844962120056,
|
|
"reward_std": 0.08786453604698181,
|
|
"rewards/accuracy_reward": 0.54228515625,
|
|
"rewards/brier_reward": 0.8001392245292663,
|
|
"rewards/confidence_uniqueness_reward": 0.9570415496826172,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0026703955605626105,
|
|
"rewards/frontier_coverage_0": 0.12262420728802681,
|
|
"rewards/frontier_coverage_1": 0.12262420728802681,
|
|
"rewards/frontier_coverage_10": 0.12262420728802681,
|
|
"rewards/frontier_coverage_15": 0.12262420728802681,
|
|
"rewards/frontier_coverage_20": 0.12122518271207809,
|
|
"rewards/frontier_coverage_25": 0.10408189445734024,
|
|
"rewards/frontier_coverage_5": 0.12262420728802681,
|
|
"rewards/frontier_ece_reward": 0.008721418399363755,
|
|
"rewards/frontier_entropy_batch_reward": -0.18648791313171387,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.104351806640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.175,
|
|
"signal/accuracy_reward/group_std_mean": 0.1387265920639038,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521759033203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0521759033203125,
|
|
"signal/advantage_abs_mean": 0.06842133551836013,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06842133551836013,
|
|
"signal/advantage_pre_scale_std": 0.1080582544207573,
|
|
"signal/advantage_std": 0.1080582544207573,
|
|
"signal/brier_reward/centered_abs_mean": 0.12543393820524215,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84765625,
|
|
"signal/brier_reward/group_std_mean": 0.16112754940986634,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012543394230306149,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012543394230306149,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011928396113216878,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.95234375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01579369381070137,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011928396532312035,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011928396532312035,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027430617716163396,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004478739900514483,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.4288274036953226e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.4288274036953226e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17456578612327575,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.22599020898342131,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17456578612327575,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22599020898342131,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17456578612327575,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22599020898342131,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17456578612327575,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22599020898342131,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17225814163684844,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22308208048343658,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021532268263399603,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021532268263399603,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.139526429772377,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.85390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1816743493080139,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017440804746001958,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017440804746001958,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17456578612327575,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22599020898342131,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021820723544806243,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013731100969016552,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.022244062460958957,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013731101527810097,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013731101527810097,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25302750468254087,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3287863492965698,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02530275024473667,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02530275024473667,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22570365505708892,
|
|
"calibration/batch_distribution_entropy": 0.9843955404941772,
|
|
"calibration/batch_entropy_100bins": 0.9662716763828755,
|
|
"calibration/batch_entropy_10bins": 0.9843955404941772,
|
|
"calibration/batch_entropy_50bins": 0.9777331174175243,
|
|
"calibration/batch_uniqueness": 0.959429931640625,
|
|
"calibration/buffer_distribution_entropy": 0.9982544609568785,
|
|
"calibration/buffer_entropy_100bins": 0.9893260796188164,
|
|
"calibration/buffer_entropy_10bins": 0.9982544609568785,
|
|
"calibration/buffer_entropy_50bins": 0.9945346311546374,
|
|
"calibration/confidence_entropy": 0.4794441883496332,
|
|
"calibration/coverage@0%": 0.021875,
|
|
"calibration/coverage@1%": 0.05,
|
|
"calibration/coverage@10%": 0.196875,
|
|
"calibration/coverage@15%": 0.2703125,
|
|
"calibration/coverage@20%": 0.538671875,
|
|
"calibration/coverage@25%": 0.646484375,
|
|
"calibration/coverage@30%": 0.73046875,
|
|
"calibration/coverage@5%": 0.11171875,
|
|
"calibration/ece": 0.0953630593548969,
|
|
"calibration/mean_confidence": 0.5133342115984929,
|
|
"calibration/prompt_uniqueness": 0.8658203125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 717.8,
|
|
"completions/mean_length": 187.49580078125,
|
|
"completions/mean_terminated_length": 186.83703002929687,
|
|
"completions/min_length": 81.2,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0014953252393752337,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 568388511.0,
|
|
"reward": 0.9478225350379944,
|
|
"reward_std": 0.09547350853681565,
|
|
"rewards/accuracy_reward": 0.5732421875,
|
|
"rewards/brier_reward": 0.7815747022628784,
|
|
"rewards/confidence_uniqueness_reward": 0.957908034324646,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0027830671519041062,
|
|
"rewards/frontier_coverage_0": 0.07353707253932953,
|
|
"rewards/frontier_coverage_1": 0.07353707253932953,
|
|
"rewards/frontier_coverage_10": 0.07353707253932953,
|
|
"rewards/frontier_coverage_15": 0.07353707253932953,
|
|
"rewards/frontier_coverage_20": 0.07318145632743836,
|
|
"rewards/frontier_coverage_25": 0.058940806239843366,
|
|
"rewards/frontier_coverage_5": 0.07353707253932953,
|
|
"rewards/frontier_ece_reward": 0.0069223855622112754,
|
|
"rewards/frontier_entropy_batch_reward": -0.19407747387886048,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1157470703125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.182421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.15591520071029663,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.540625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05787353515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05787353515625,
|
|
"signal/advantage_abs_mean": 0.07254650890827179,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07254650890827179,
|
|
"signal/advantage_pre_scale_std": 0.11336593627929688,
|
|
"signal/advantage_std": 0.11336593627929688,
|
|
"signal/brier_reward/centered_abs_mean": 0.13599575757980348,
|
|
"signal/brier_reward/group_bin_occupancy": 0.855859375,
|
|
"signal/brier_reward/group_std_mean": 0.1739354431629181,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01359957605600357,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01359957605600357,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012918978370726109,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.931640625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017537206411361694,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012918978696689009,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012918978696689009,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.126953125,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028809635899960996,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.698046875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004857636988162994,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.601204407459591e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.601204407459591e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1849408507347107,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2365315616130829,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1849408507347107,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2365315616130829,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1849408507347107,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2365315616130829,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1849408507347107,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2365315616130829,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18298504054546355,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2340652674436569,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022873131558299063,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022873131558299063,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13958741277456282,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.859375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1793098896741867,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017448426457121967,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017448426457121967,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1849408507347107,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2365315616130829,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002311760699376464,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012716376781463623,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83515625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.021388059109449388,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012716377153992654,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012716377153992654,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2714007079601288,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.748046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34507684111595155,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027140070497989655,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027140070497989655,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2630419824140453,
|
|
"calibration/batch_distribution_entropy": 0.9855633289125907,
|
|
"calibration/batch_entropy_100bins": 0.9657104346168568,
|
|
"calibration/batch_entropy_10bins": 0.9855633289125907,
|
|
"calibration/batch_entropy_50bins": 0.9780025735821166,
|
|
"calibration/batch_uniqueness": 0.9603877337790866,
|
|
"calibration/buffer_distribution_entropy": 0.9981646752348159,
|
|
"calibration/buffer_entropy_100bins": 0.9893824626518759,
|
|
"calibration/buffer_entropy_10bins": 0.9981646752348159,
|
|
"calibration/buffer_entropy_50bins": 0.9944247124897367,
|
|
"calibration/confidence_entropy": 0.49413471824349064,
|
|
"calibration/coverage@0%": 0.011331182729941292,
|
|
"calibration/coverage@1%": 0.0863311827299413,
|
|
"calibration/coverage@10%": 0.2082061827299413,
|
|
"calibration/coverage@15%": 0.24805222602739727,
|
|
"calibration/coverage@20%": 0.36997767857142855,
|
|
"calibration/coverage@25%": 0.49003791585127204,
|
|
"calibration/coverage@30%": 0.6072965080724071,
|
|
"calibration/coverage@5%": 0.1652374327299413,
|
|
"calibration/ece": 0.13711708315742724,
|
|
"calibration/mean_confidence": 0.48495450226497266,
|
|
"calibration/prompt_uniqueness": 0.8678187089457596,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 782.2,
|
|
"completions/max_terminated_length": 617.6,
|
|
"completions/mean_length": 185.22158203125,
|
|
"completions/mean_terminated_length": 185.09028015136718,
|
|
"completions/min_length": 81.4,
|
|
"completions/min_terminated_length": 81.4,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0008502820273861289,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 585106588.0,
|
|
"reward": 0.9321273326873779,
|
|
"reward_std": 0.08039158433675767,
|
|
"rewards/accuracy_reward": 0.53154296875,
|
|
"rewards/brier_reward": 0.8001036405563354,
|
|
"rewards/confidence_uniqueness_reward": 0.9595265865325928,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002705985214561224,
|
|
"rewards/frontier_coverage_0": 0.11961866915225983,
|
|
"rewards/frontier_coverage_1": 0.11961866915225983,
|
|
"rewards/frontier_coverage_10": 0.11961866915225983,
|
|
"rewards/frontier_coverage_15": 0.11961866915225983,
|
|
"rewards/frontier_coverage_20": 0.1191520243883133,
|
|
"rewards/frontier_coverage_25": 0.08837539106607437,
|
|
"rewards/frontier_coverage_5": 0.11961866915225983,
|
|
"rewards/frontier_ece_reward": 0.006058618426322937,
|
|
"rewards/frontier_entropy_batch_reward": -0.20200627744197847,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082733154296875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11375210285186768,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0413665771484375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0413665771484375,
|
|
"signal/advantage_abs_mean": 0.06228437945246697,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06228437945246697,
|
|
"signal/advantage_pre_scale_std": 0.09934655725955963,
|
|
"signal/advantage_std": 0.09934655725955963,
|
|
"signal/brier_reward/centered_abs_mean": 0.12185298353433609,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84296875,
|
|
"signal/brier_reward/group_std_mean": 0.15725724995136262,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012185298651456834,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012185298651456834,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01168802659958601,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.014986979961395263,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011688026832416653,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011688026832416653,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027294772677123546,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.692578125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004910151939839125,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.411846555536613e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.411846555536613e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16406928300857543,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2110010415315628,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16406928300857543,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2110010415315628,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16406928300857543,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2110010415315628,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16406928300857543,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2110010415315628,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16182146072387696,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2081581711769104,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020227682311087848,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020227682311087848,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1146465077996254,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.871875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.14809595346450805,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014330813428387046,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014330813428387046,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16406928300857543,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2110010415315628,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020508660934865476,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009023293852806091,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012150265648961068,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009023294202052057,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009023294202052057,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2724481761455536,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3477316856384277,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027244817838072775,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027244817838072775,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.309636001445996,
|
|
"calibration/batch_distribution_entropy": 0.9839093996134368,
|
|
"calibration/batch_entropy_100bins": 0.9625499427091299,
|
|
"calibration/batch_entropy_10bins": 0.9839093996134368,
|
|
"calibration/batch_entropy_50bins": 0.972969015575497,
|
|
"calibration/batch_uniqueness": 0.960882568359375,
|
|
"calibration/buffer_distribution_entropy": 0.998281991860735,
|
|
"calibration/buffer_entropy_100bins": 0.9894724328721299,
|
|
"calibration/buffer_entropy_10bins": 0.998281991860735,
|
|
"calibration/buffer_entropy_50bins": 0.9944521850113789,
|
|
"calibration/confidence_entropy": 0.49251360413414175,
|
|
"calibration/coverage@0%": 0.010546875,
|
|
"calibration/coverage@1%": 0.010546875,
|
|
"calibration/coverage@10%": 0.137890625,
|
|
"calibration/coverage@15%": 0.208984375,
|
|
"calibration/coverage@20%": 0.284765625,
|
|
"calibration/coverage@25%": 0.343359375,
|
|
"calibration/coverage@30%": 0.46015625,
|
|
"calibration/coverage@5%": 0.0421875,
|
|
"calibration/ece": 0.08670626321827299,
|
|
"calibration/mean_confidence": 0.489370553214073,
|
|
"calibration/prompt_uniqueness": 0.857275390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 735.4,
|
|
"completions/max_terminated_length": 592.0,
|
|
"completions/mean_length": 181.85361328125,
|
|
"completions/mean_terminated_length": 181.58960876464843,
|
|
"completions/min_length": 81.6,
|
|
"completions/min_terminated_length": 81.6,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.0010482225334271789,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 602155393.0,
|
|
"reward": 0.9271166443824768,
|
|
"reward_std": 0.07998622953891754,
|
|
"rewards/accuracy_reward": 0.52412109375,
|
|
"rewards/brier_reward": 0.7888967275619507,
|
|
"rewards/confidence_uniqueness_reward": 0.9595050811767578,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003116795467212796,
|
|
"rewards/frontier_coverage_0": 0.1158630833029747,
|
|
"rewards/frontier_coverage_1": 0.1158630833029747,
|
|
"rewards/frontier_coverage_10": 0.1158630833029747,
|
|
"rewards/frontier_coverage_15": 0.1158630833029747,
|
|
"rewards/frontier_coverage_20": 0.11500565633177758,
|
|
"rewards/frontier_coverage_25": 0.08373434320092202,
|
|
"rewards/frontier_coverage_5": 0.1158630833029747,
|
|
"rewards/frontier_ece_reward": 0.005350236594676971,
|
|
"rewards/frontier_entropy_batch_reward": -0.1985933691263199,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.077752685546875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1094050019979477,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0388763427734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0388763427734375,
|
|
"signal/advantage_abs_mean": 0.06033368557691574,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06033368557691574,
|
|
"signal/advantage_pre_scale_std": 0.09716939330101013,
|
|
"signal/advantage_std": 0.09716939330101013,
|
|
"signal/brier_reward/centered_abs_mean": 0.12069027125835419,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85546875,
|
|
"signal/brier_reward/group_std_mean": 0.1551447778940201,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012069026939570904,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012069026939570904,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01292349398136139,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016793293692171574,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292349398136139,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292349398136139,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028868647757917644,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7015625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005015233065932989,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.608580991567578e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.608580991567578e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16046448349952697,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20543249547481537,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16046448349952697,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20543249547481537,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16046448349952697,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20543249547481537,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16046448349952697,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20543249547481537,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15846198201179504,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20285292565822602,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019807748030871153,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019807748030871153,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10855960100889206,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.13992275893688202,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001356995035894215,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001356995035894215,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16046448349952697,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8828125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20543249547481537,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002005806053057313,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009020330384373665,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012636875361204147,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009020330267958343,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009020330267958343,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2665239542722702,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34042556285858155,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026652396842837333,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026652396842837333,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29726541114295457,
|
|
"calibration/batch_distribution_entropy": 0.9834822954947106,
|
|
"calibration/batch_entropy_100bins": 0.9608169793074197,
|
|
"calibration/batch_entropy_10bins": 0.9834822954947106,
|
|
"calibration/batch_entropy_50bins": 0.9750636920076771,
|
|
"calibration/batch_uniqueness": 0.9607329022889346,
|
|
"calibration/buffer_distribution_entropy": 0.9983789319326808,
|
|
"calibration/buffer_entropy_100bins": 0.9895114798338899,
|
|
"calibration/buffer_entropy_10bins": 0.9983789319326808,
|
|
"calibration/buffer_entropy_50bins": 0.9945117700392121,
|
|
"calibration/confidence_entropy": 0.48444024753987314,
|
|
"calibration/coverage@0%": 0.01800085616438356,
|
|
"calibration/coverage@1%": 0.01800085616438356,
|
|
"calibration/coverage@10%": 0.16278666218199608,
|
|
"calibration/coverage@15%": 0.30757323263209396,
|
|
"calibration/coverage@20%": 0.4346991193737769,
|
|
"calibration/coverage@25%": 0.5273888515166341,
|
|
"calibration/coverage@30%": 0.5982035836594912,
|
|
"calibration/coverage@5%": 0.08451565557729941,
|
|
"calibration/ece": 0.12100732825692273,
|
|
"calibration/mean_confidence": 0.49416566136722384,
|
|
"calibration/prompt_uniqueness": 0.8536409725383713,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1118.0,
|
|
"completions/max_terminated_length": 492.2,
|
|
"completions/mean_length": 176.89560546875,
|
|
"completions/mean_terminated_length": 176.4981475830078,
|
|
"completions/min_length": 79.8,
|
|
"completions/min_terminated_length": 79.8,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0010737936245277524,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 619134516.0,
|
|
"reward": 0.9240444660186767,
|
|
"reward_std": 0.0810801163315773,
|
|
"rewards/accuracy_reward": 0.5212890625,
|
|
"rewards/brier_reward": 0.790893018245697,
|
|
"rewards/confidence_uniqueness_reward": 0.9601579666137695,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.00303261773660779,
|
|
"rewards/frontier_coverage_0": 0.12167765200138092,
|
|
"rewards/frontier_coverage_1": 0.12167765200138092,
|
|
"rewards/frontier_coverage_10": 0.12167765200138092,
|
|
"rewards/frontier_coverage_15": 0.12167765200138092,
|
|
"rewards/frontier_coverage_20": 0.1199147269129753,
|
|
"rewards/frontier_coverage_25": 0.08222576975822449,
|
|
"rewards/frontier_coverage_5": 0.12167765200138092,
|
|
"rewards/frontier_ece_reward": 0.0055978668853640555,
|
|
"rewards/frontier_entropy_batch_reward": -0.22212174534797668,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08709716796875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11671981066465378,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.043548583984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043548583984375,
|
|
"signal/advantage_abs_mean": 0.0631883479654789,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0631883479654789,
|
|
"signal/advantage_pre_scale_std": 0.09987544417381286,
|
|
"signal/advantage_std": 0.09987544417381286,
|
|
"signal/brier_reward/centered_abs_mean": 0.11597198843955994,
|
|
"signal/brier_reward/group_bin_occupancy": 0.838671875,
|
|
"signal/brier_reward/group_std_mean": 0.149802365899086,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011597198992967605,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011597198992967605,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012930301018059254,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.911328125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017115654610097408,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012930301018059254,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012930301018059254,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002846223535016179,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.709765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004566754633560777,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.557779564289376e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.557779564289376e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1627124637365341,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.20693700313568114,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1627124637365341,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20693700313568114,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1627124637365341,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20693700313568114,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1627124637365341,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20693700313568114,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15905381739139557,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20235534608364106,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001988172740675509,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001988172740675509,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.10259814411401749,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1315797194838524,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012824768433347344,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012824768433347344,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1627124637365341,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20693700313568114,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020339058246463537,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009119224734604359,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012649010121822356,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009119224967435002,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009119224967435002,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2906370997428894,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.723828125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3651686549186707,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029063709452748297,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029063709452748297,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23078012934450687,
|
|
"calibration/batch_distribution_entropy": 0.972169633516365,
|
|
"calibration/batch_entropy_100bins": 0.9577764655227113,
|
|
"calibration/batch_entropy_10bins": 0.972169633516365,
|
|
"calibration/batch_entropy_50bins": 0.9698115325872496,
|
|
"calibration/batch_uniqueness": 0.958111572265625,
|
|
"calibration/buffer_distribution_entropy": 0.9983615562842496,
|
|
"calibration/buffer_entropy_100bins": 0.989554537741407,
|
|
"calibration/buffer_entropy_10bins": 0.9983615562842496,
|
|
"calibration/buffer_entropy_50bins": 0.9944703301690406,
|
|
"calibration/confidence_entropy": 0.4702111359575749,
|
|
"calibration/coverage@0%": 0.074609375,
|
|
"calibration/coverage@1%": 0.0796875,
|
|
"calibration/coverage@10%": 0.249609375,
|
|
"calibration/coverage@15%": 0.390234375,
|
|
"calibration/coverage@20%": 0.530078125,
|
|
"calibration/coverage@25%": 0.60859375,
|
|
"calibration/coverage@30%": 0.70234375,
|
|
"calibration/coverage@5%": 0.15078125,
|
|
"calibration/ece": 0.11253724402326422,
|
|
"calibration/mean_confidence": 0.4741178147270788,
|
|
"calibration/prompt_uniqueness": 0.855712890625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 956.2,
|
|
"completions/max_terminated_length": 574.2,
|
|
"completions/mean_length": 183.07431640625,
|
|
"completions/mean_terminated_length": 182.80919494628907,
|
|
"completions/min_length": 82.8,
|
|
"completions/min_terminated_length": 82.8,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0008162545855157077,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 636008685.0,
|
|
"reward": 0.932918655872345,
|
|
"reward_std": 0.07714778482913971,
|
|
"rewards/accuracy_reward": 0.5296875,
|
|
"rewards/brier_reward": 0.809378182888031,
|
|
"rewards/confidence_uniqueness_reward": 0.959358549118042,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002335884002968669,
|
|
"rewards/frontier_coverage_0": 0.13527322113513945,
|
|
"rewards/frontier_coverage_1": 0.13527322113513945,
|
|
"rewards/frontier_coverage_10": 0.13527322113513945,
|
|
"rewards/frontier_coverage_15": 0.13527322113513945,
|
|
"rewards/frontier_coverage_20": 0.12897036075592042,
|
|
"rewards/frontier_coverage_25": 0.08629466593265533,
|
|
"rewards/frontier_coverage_5": 0.13527322113513945,
|
|
"rewards/frontier_ece_reward": 0.006201074831187725,
|
|
"rewards/frontier_entropy_batch_reward": -0.20437394380569457,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08509521484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1144148737192154,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042547607421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042547607421875,
|
|
"signal/advantage_abs_mean": 0.05944623276591301,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05944623276591301,
|
|
"signal/advantage_pre_scale_std": 0.09432210624217988,
|
|
"signal/advantage_std": 0.09432210624217988,
|
|
"signal/brier_reward/centered_abs_mean": 0.11613385826349258,
|
|
"signal/brier_reward/group_bin_occupancy": 0.843359375,
|
|
"signal/brier_reward/group_std_mean": 0.14919577836990355,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011613386496901513,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011613386496901513,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012426980212330819,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9140625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0162442235276103,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012426980305463077,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012426980305463077,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002181270159780979,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003462765412405133,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.726587808865588e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.726587808865588e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17015551626682282,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2166207551956177,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17015551626682282,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2166207551956177,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17015551626682282,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2166207551956177,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17015551626682282,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2166207551956177,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16215289533138275,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8578125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20657850205898284,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020269112894311546,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020269112894311546,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09889246076345444,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.878515625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1259763240814209,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012361557688564061,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012361557688564061,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17015551626682282,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86015625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2166207551956177,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002126943925395608,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008855049218982457,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.825,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012453357130289078,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008855049381963909,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008855049381963909,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2709381639957428,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3473371982574463,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02709381692111492,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02709381692111492,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24190303576000538,
|
|
"calibration/batch_distribution_entropy": 0.9851080109571406,
|
|
"calibration/batch_entropy_100bins": 0.9660194285586039,
|
|
"calibration/batch_entropy_10bins": 0.9851080109571406,
|
|
"calibration/batch_entropy_50bins": 0.9779150487479604,
|
|
"calibration/batch_uniqueness": 0.9610351488052915,
|
|
"calibration/buffer_distribution_entropy": 0.9984440017454219,
|
|
"calibration/buffer_entropy_100bins": 0.98965536789939,
|
|
"calibration/buffer_entropy_10bins": 0.9984440017454219,
|
|
"calibration/buffer_entropy_50bins": 0.9944964147279853,
|
|
"calibration/confidence_entropy": 0.4965098505390208,
|
|
"calibration/coverage@0%": 0.01917196673189824,
|
|
"calibration/coverage@1%": 0.01917196673189824,
|
|
"calibration/coverage@10%": 0.12628424657534248,
|
|
"calibration/coverage@15%": 0.30368685787671235,
|
|
"calibration/coverage@20%": 0.44866530088062617,
|
|
"calibration/coverage@25%": 0.5823018590998043,
|
|
"calibration/coverage@30%": 0.6850691046966733,
|
|
"calibration/coverage@5%": 0.048483365949119374,
|
|
"calibration/ece": 0.08340996381299517,
|
|
"calibration/mean_confidence": 0.516178190966678,
|
|
"calibration/prompt_uniqueness": 0.8672639686036681,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 779.6,
|
|
"completions/max_terminated_length": 570.2,
|
|
"completions/mean_length": 184.97431640625,
|
|
"completions/mean_terminated_length": 184.84255981445312,
|
|
"completions/min_length": 85.0,
|
|
"completions/min_terminated_length": 85.0,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0011064645368605852,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 653246726.0,
|
|
"reward": 0.9357229709625244,
|
|
"reward_std": 0.08368170112371445,
|
|
"rewards/accuracy_reward": 0.5328125,
|
|
"rewards/brier_reward": 0.8055280208587646,
|
|
"rewards/confidence_uniqueness_reward": 0.9595144271850586,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0028843384236097334,
|
|
"rewards/frontier_coverage_0": 0.12474274337291717,
|
|
"rewards/frontier_coverage_1": 0.12474274337291717,
|
|
"rewards/frontier_coverage_10": 0.12474274337291717,
|
|
"rewards/frontier_coverage_15": 0.12449503540992737,
|
|
"rewards/frontier_coverage_20": 0.11553706079721451,
|
|
"rewards/frontier_coverage_25": 0.07678574174642563,
|
|
"rewards/frontier_coverage_5": 0.12474274337291717,
|
|
"rewards/frontier_ece_reward": 0.005106198182329535,
|
|
"rewards/frontier_entropy_batch_reward": -0.1781061351299286,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09542236328125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.127110655605793,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047711181640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.047711181640625,
|
|
"signal/advantage_abs_mean": 0.06504265516996384,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06504265516996384,
|
|
"signal/advantage_pre_scale_std": 0.10494562834501267,
|
|
"signal/advantage_std": 0.10494562834501267,
|
|
"signal/brier_reward/centered_abs_mean": 0.11675633937120437,
|
|
"signal/brier_reward/group_bin_occupancy": 0.855859375,
|
|
"signal/brier_reward/group_std_mean": 0.1501062899827957,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011675634235143662,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011675634235143662,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011868251860141754,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.928125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015202015824615955,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011868252186104655,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011868252186104655,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027540235314518213,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70546875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004497009515762329,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.442529414314777e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.442529414314777e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15991105139255524,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2048025608062744,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15991105139255524,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2048025608062744,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15991105139255524,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2048025608062744,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15969133675098418,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.881640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20454807877540587,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001996141788549721,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001996141788549721,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1426139533519745,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18301699459552764,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00178267452865839,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00178267452865839,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08272561132907867,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.899609375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10734816044569015,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001034070155583322,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001034070155583322,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15991105139255524,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2048025608062744,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019988882122561336,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00812565665692091,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83671875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011584336683154107,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008125656750053167,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008125656750053167,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2498374253511429,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3267929255962372,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024983742833137514,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024983742833137514,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26512359032820576,
|
|
"calibration/batch_distribution_entropy": 0.9819760604956709,
|
|
"calibration/batch_entropy_100bins": 0.961525383497633,
|
|
"calibration/batch_entropy_10bins": 0.9819760604956709,
|
|
"calibration/batch_entropy_50bins": 0.9735273980284143,
|
|
"calibration/batch_uniqueness": 0.9592022574135403,
|
|
"calibration/buffer_distribution_entropy": 0.9985324674567476,
|
|
"calibration/buffer_entropy_100bins": 0.9898235209789318,
|
|
"calibration/buffer_entropy_10bins": 0.9985324674567476,
|
|
"calibration/buffer_entropy_50bins": 0.9945705588322357,
|
|
"calibration/confidence_entropy": 0.5043373180268479,
|
|
"calibration/coverage@0%": 0.06848550636007827,
|
|
"calibration/coverage@1%": 0.07318217954990215,
|
|
"calibration/coverage@10%": 0.3003011863992172,
|
|
"calibration/coverage@15%": 0.35930925880626224,
|
|
"calibration/coverage@20%": 0.3980078889432485,
|
|
"calibration/coverage@25%": 0.4449081152152642,
|
|
"calibration/coverage@30%": 0.6000076443248532,
|
|
"calibration/coverage@5%": 0.23814135885518595,
|
|
"calibration/ece": 0.16721492077336086,
|
|
"calibration/mean_confidence": 0.5371362845790759,
|
|
"calibration/prompt_uniqueness": 0.8633109228668054,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0009765625,
|
|
"completions/max_length": 1131.8,
|
|
"completions/max_terminated_length": 729.0,
|
|
"completions/mean_length": 188.35107421875,
|
|
"completions/mean_terminated_length": 187.03569641113282,
|
|
"completions/min_length": 82.4,
|
|
"completions/min_terminated_length": 82.4,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0009473967947997153,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.002,
|
|
"num_tokens": 670518129.0,
|
|
"reward": 0.9468509316444397,
|
|
"reward_std": 0.07844078540802002,
|
|
"rewards/accuracy_reward": 0.5681640625,
|
|
"rewards/brier_reward": 0.8030801296234131,
|
|
"rewards/confidence_uniqueness_reward": 0.957956874370575,
|
|
"rewards/format_reward": 0.99892578125,
|
|
"rewards/frontier_aurc_reward": -0.0027081962209194897,
|
|
"rewards/frontier_coverage_0": 0.09533136114478111,
|
|
"rewards/frontier_coverage_1": 0.09533136114478111,
|
|
"rewards/frontier_coverage_10": 0.09533136114478111,
|
|
"rewards/frontier_coverage_15": 0.0953597754240036,
|
|
"rewards/frontier_coverage_20": 0.08704339265823365,
|
|
"rewards/frontier_coverage_25": 0.05847667083144188,
|
|
"rewards/frontier_coverage_5": 0.09533136114478111,
|
|
"rewards/frontier_ece_reward": 0.005162352602928877,
|
|
"rewards/frontier_entropy_batch_reward": -0.21057653427124023,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07152099609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.162109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.09872582405805588,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.035760498046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035760498046875,
|
|
"signal/advantage_abs_mean": 0.0600838340818882,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0600838340818882,
|
|
"signal/advantage_pre_scale_std": 0.09821470826864243,
|
|
"signal/advantage_std": 0.09821470826864243,
|
|
"signal/brier_reward/centered_abs_mean": 0.11101796627044677,
|
|
"signal/brier_reward/group_bin_occupancy": 0.848828125,
|
|
"signal/brier_reward/group_std_mean": 0.14420543015003204,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011101796850562095,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011101796850562095,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013335288688540458,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.93515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01753148380666971,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013335288735106588,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013335288735106588,
|
|
"signal/format_reward/centered_abs_mean": 0.001739501953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.1265625,
|
|
"signal/format_reward/group_std_mean": 0.0030320982448756697,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008697509765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008697509765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027640830259770153,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004512441391125321,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.455103724263609e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.455103724263609e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.139675572514534,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18167279958724974,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.139675572514534,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18167279958724974,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.139675572514534,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18167279958724974,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1394558221101761,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.875390625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18138521909713745,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017431978834792972,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017431978834792972,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1208455815911293,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15725071132183074,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015105698024854065,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015105698024854065,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07018533274531365,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.8984375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09095044732093811,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008773167035542428,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008773167035542428,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.139675572514534,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18167279958724974,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459447728469968,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008315538614988327,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.822265625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01194094903767109,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008315538754686714,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008315538754686714,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27706546187400816,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35140617489814757,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02770654745399952,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02770654745399952,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.4566159075334175,
|
|
"eval_calibration/batch_distribution_entropy": 0.9163126557855614,
|
|
"eval_calibration/batch_entropy_100bins": 0.7178744913412122,
|
|
"eval_calibration/batch_entropy_10bins": 0.9163126557855614,
|
|
"eval_calibration/batch_entropy_50bins": 0.798684377132812,
|
|
"eval_calibration/batch_uniqueness": 0.904296875,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9985671709970653,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9900184255261969,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9985671709970653,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9946421013154033,
|
|
"eval_calibration/confidence_entropy": 0.5005098902291641,
|
|
"eval_calibration/coverage@0%": 0.0390625,
|
|
"eval_calibration/coverage@1%": 0.0390625,
|
|
"eval_calibration/coverage@10%": 0.0390625,
|
|
"eval_calibration/coverage@15%": 0.09375,
|
|
"eval_calibration/coverage@20%": 0.1875,
|
|
"eval_calibration/coverage@25%": 0.2109375,
|
|
"eval_calibration/coverage@30%": 0.25,
|
|
"eval_calibration/coverage@5%": 0.0390625,
|
|
"eval_calibration/ece": 0.19819265669162003,
|
|
"eval_calibration/mean_confidence": 0.44646902260571164,
|
|
"eval_calibration/prompt_uniqueness": 0.904296875,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 373.25,
|
|
"eval_completions/max_terminated_length": 373.25,
|
|
"eval_completions/mean_length": 191.53293228149414,
|
|
"eval_completions/mean_terminated_length": 191.53293228149414,
|
|
"eval_completions/min_length": 95.5,
|
|
"eval_completions/min_terminated_length": 95.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 670518129.0,
|
|
"eval_reward": 0.799683153629303,
|
|
"eval_reward_std": 0.22493423148989677,
|
|
"eval_rewards/accuracy_reward": 0.4296875,
|
|
"eval_rewards/brier_reward": 0.7988216429948807,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89794921875,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0034247017465531826,
|
|
"eval_rewards/frontier_coverage_0": 0.18689577654004097,
|
|
"eval_rewards/frontier_coverage_1": 0.18689577654004097,
|
|
"eval_rewards/frontier_coverage_10": 0.18689577654004097,
|
|
"eval_rewards/frontier_coverage_15": 0.18632838502526283,
|
|
"eval_rewards/frontier_coverage_20": 0.1586691550910473,
|
|
"eval_rewards/frontier_coverage_25": 0.08706778101623058,
|
|
"eval_rewards/frontier_coverage_5": 0.18689577654004097,
|
|
"eval_rewards/frontier_ece_reward": 0.004595339996740222,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 19.8251,
|
|
"eval_samples_per_second": 25.22,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.47509765625,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49481892585754395,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.237548828125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.237548828125,
|
|
"eval_signal/advantage_abs_mean": 0.21163957193493843,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21163957193493843,
|
|
"eval_signal/advantage_pre_scale_std": 0.2224200740456581,
|
|
"eval_signal/advantage_std": 0.2224200740456581,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18079102784395218,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8828125,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2304544784128666,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018079102504998446,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018079102504998446,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0389862060546875,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.40625,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.046543585136532784,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003898620721884072,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003898620721884072,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004244803451001644,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6015625,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008210767526179552,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.306004641170148e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.306004641170148e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36475419253110886,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.4384455382823944,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36475419253110886,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4384455382823944,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36475419253110886,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4384455382823944,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3635733351111412,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.43705061078071594,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004544666619040072,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004544666619040072,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3101271614432335,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.984375,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.37565645575523376,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038765897625125945,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038765897625125945,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.15158939361572266,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.19350523501634598,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018948675133287907,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018948675133287907,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36475419253110886,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4384455382823944,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004559427383355796,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00890616630204022,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.890625,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.013190251076593995,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008906166476663202,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008906166476663202,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.202,
|
|
"step": 200
|
|
},
|
|
{
|
|
"calibration/aurc": 0.41371530585067084,
|
|
"calibration/batch_distribution_entropy": 0.968526477144453,
|
|
"calibration/batch_entropy_100bins": 0.9606235974216354,
|
|
"calibration/batch_entropy_10bins": 0.968526477144453,
|
|
"calibration/batch_entropy_50bins": 0.9681260324091415,
|
|
"calibration/batch_uniqueness": 0.9544647216796875,
|
|
"calibration/buffer_distribution_entropy": 0.9986671560208302,
|
|
"calibration/buffer_entropy_100bins": 0.9902706960336737,
|
|
"calibration/buffer_entropy_10bins": 0.9986671560208302,
|
|
"calibration/buffer_entropy_50bins": 0.9947728709798141,
|
|
"calibration/confidence_entropy": 0.5252373594473655,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.0140625,
|
|
"calibration/coverage@15%": 0.06171875,
|
|
"calibration/coverage@20%": 0.1109375,
|
|
"calibration/coverage@25%": 0.179296875,
|
|
"calibration/coverage@30%": 0.28984375,
|
|
"calibration/coverage@5%": 0.00546875,
|
|
"calibration/ece": 0.10538980298983983,
|
|
"calibration/mean_confidence": 0.4486731517153184,
|
|
"calibration/prompt_uniqueness": 0.8537109375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 558.0,
|
|
"completions/max_terminated_length": 558.0,
|
|
"completions/mean_length": 188.62841796875,
|
|
"completions/mean_terminated_length": 188.62841796875,
|
|
"completions/min_length": 86.2,
|
|
"completions/min_terminated_length": 86.2,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0009614454465918243,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 687306228.0,
|
|
"reward": 0.9128621459007263,
|
|
"reward_std": 0.08489621281623841,
|
|
"rewards/accuracy_reward": 0.49951171875,
|
|
"rewards/brier_reward": 0.7791517615318299,
|
|
"rewards/confidence_uniqueness_reward": 0.9559079051017761,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00292632021009922,
|
|
"rewards/frontier_coverage_0": 0.11452654302120209,
|
|
"rewards/frontier_coverage_1": 0.11452654302120209,
|
|
"rewards/frontier_coverage_10": 0.11452654302120209,
|
|
"rewards/frontier_coverage_15": 0.11425123661756516,
|
|
"rewards/frontier_coverage_20": 0.0982695385813713,
|
|
"rewards/frontier_coverage_25": 0.05823923796415329,
|
|
"rewards/frontier_coverage_5": 0.11452654302120209,
|
|
"rewards/frontier_ece_reward": 0.0032087708823382854,
|
|
"rewards/frontier_entropy_batch_reward": -0.1974597692489624,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.094403076171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.12853155434131622,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0472015380859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0472015380859375,
|
|
"signal/advantage_abs_mean": 0.0651530534029007,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0651530534029007,
|
|
"signal/advantage_pre_scale_std": 0.1036272794008255,
|
|
"signal/advantage_std": 0.1036272794008255,
|
|
"signal/brier_reward/centered_abs_mean": 0.12340695858001709,
|
|
"signal/brier_reward/group_bin_occupancy": 0.872265625,
|
|
"signal/brier_reward/group_std_mean": 0.15771982073783875,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012340695783495902,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012340695783495902,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012716875597834586,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016173630580306055,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012716875644400716,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012716875644400716,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025363420136272907,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7109375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004503958486020565,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.170427517034114e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.170427517034114e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1702498823404312,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2185587167739868,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1702498823404312,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2185587167739868,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1702498823404312,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2185587167739868,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1695919394493103,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.88671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21770275235176087,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021198994014412164,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021198994014412164,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1464843899011612,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18815037310123445,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018310548504814506,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018310548504814506,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0794641137123108,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10283097177743912,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009933014633134007,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009933014633134007,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1702498823404312,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.887109375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2185587167739868,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021281236317008735,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006669469363987446,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009653137251734733,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006669469643384218,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006669469643384218,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26615132987499235,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3430874884128571,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.026615133881568907,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026615133881568907,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2996998322733083,
|
|
"calibration/batch_distribution_entropy": 0.9712911308832048,
|
|
"calibration/batch_entropy_100bins": 0.9581911832477289,
|
|
"calibration/batch_entropy_10bins": 0.9712911308832048,
|
|
"calibration/batch_entropy_50bins": 0.9694983488466955,
|
|
"calibration/batch_uniqueness": 0.9568363156374307,
|
|
"calibration/buffer_distribution_entropy": 0.9989196857105181,
|
|
"calibration/buffer_entropy_100bins": 0.9907594188460136,
|
|
"calibration/buffer_entropy_10bins": 0.9989196857105181,
|
|
"calibration/buffer_entropy_50bins": 0.9950373549425547,
|
|
"calibration/confidence_entropy": 0.49656147261144473,
|
|
"calibration/coverage@0%": 0.01328125,
|
|
"calibration/coverage@1%": 0.01328125,
|
|
"calibration/coverage@10%": 0.12422639432485323,
|
|
"calibration/coverage@15%": 0.16836701932485323,
|
|
"calibration/coverage@20%": 0.2230545193248532,
|
|
"calibration/coverage@25%": 0.3297280149217221,
|
|
"calibration/coverage@30%": 0.4953736545988258,
|
|
"calibration/coverage@5%": 0.031640625,
|
|
"calibration/ece": 0.13407407064959234,
|
|
"calibration/mean_confidence": 0.4797791452804888,
|
|
"calibration/prompt_uniqueness": 0.8583812638202394,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 828.0,
|
|
"completions/max_terminated_length": 688.2,
|
|
"completions/mean_length": 188.770703125,
|
|
"completions/mean_terminated_length": 188.6397918701172,
|
|
"completions/min_length": 87.4,
|
|
"completions/min_terminated_length": 87.4,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0009502097382210195,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 704152680.0,
|
|
"reward": 0.9265612006187439,
|
|
"reward_std": 0.07872170060873032,
|
|
"rewards/accuracy_reward": 0.5279296875,
|
|
"rewards/brier_reward": 0.7858775019645691,
|
|
"rewards/confidence_uniqueness_reward": 0.9564463257789612,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.00274044550023973,
|
|
"rewards/frontier_coverage_0": 0.11836416125297547,
|
|
"rewards/frontier_coverage_1": 0.11836416125297547,
|
|
"rewards/frontier_coverage_10": 0.11836416125297547,
|
|
"rewards/frontier_coverage_15": 0.11800117641687394,
|
|
"rewards/frontier_coverage_20": 0.10758722573518753,
|
|
"rewards/frontier_coverage_25": 0.0648583009839058,
|
|
"rewards/frontier_coverage_5": 0.11836416125297547,
|
|
"rewards/frontier_ece_reward": 0.003610279364511371,
|
|
"rewards/frontier_entropy_batch_reward": -0.21462770104408263,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08948974609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11885513663291931,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044744873046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044744873046875,
|
|
"signal/advantage_abs_mean": 0.060820522159337996,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060820522159337996,
|
|
"signal/advantage_pre_scale_std": 0.09661759734153748,
|
|
"signal/advantage_std": 0.09661759734153748,
|
|
"signal/brier_reward/centered_abs_mean": 0.1223609670996666,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8421875,
|
|
"signal/brier_reward/group_std_mean": 0.15609249770641326,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012236096523702144,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012236096523702144,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013496090844273567,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.926953125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01729346551001072,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013496090890839697,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013496090890839697,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024223918560892345,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.705859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004061613464727998,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.027989914698992e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.027989914698992e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17780146598815919,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2251005709171295,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17780146598815919,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2251005709171295,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17780146598815919,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2251005709171295,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17655244171619416,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2235410749912262,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022069055587053297,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022069055587053297,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15346194803714752,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19473823606967927,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019182743271812797,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019182743271812797,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08100719451904297,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.896875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10340845137834549,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010125899803824722,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010125899803824722,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17780146598815919,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2251005709171295,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002222518343478441,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007257478311657906,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010310792177915574,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007257478660903871,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007257478660903871,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2733268320560455,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.721484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34998972415924073,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027332685142755508,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027332685142755508,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.334584462013137,
|
|
"calibration/batch_distribution_entropy": 0.981975898164209,
|
|
"calibration/batch_entropy_100bins": 0.9626993539936158,
|
|
"calibration/batch_entropy_10bins": 0.981975898164209,
|
|
"calibration/batch_entropy_50bins": 0.9758907463855657,
|
|
"calibration/batch_uniqueness": 0.9594284057909366,
|
|
"calibration/buffer_distribution_entropy": 0.9989722834263842,
|
|
"calibration/buffer_entropy_100bins": 0.9911441184383936,
|
|
"calibration/buffer_entropy_10bins": 0.9989722834263842,
|
|
"calibration/buffer_entropy_50bins": 0.9952248695727167,
|
|
"calibration/confidence_entropy": 0.49724886655592815,
|
|
"calibration/coverage@0%": 0.0042976394324853225,
|
|
"calibration/coverage@1%": 0.0042976394324853225,
|
|
"calibration/coverage@10%": 0.041407014432485324,
|
|
"calibration/coverage@15%": 0.13792196673189822,
|
|
"calibration/coverage@20%": 0.3537006176614481,
|
|
"calibration/coverage@25%": 0.41976363747553813,
|
|
"calibration/coverage@30%": 0.6041638637475538,
|
|
"calibration/coverage@5%": 0.019532014432485322,
|
|
"calibration/ece": 0.13218861654477354,
|
|
"calibration/mean_confidence": 0.5038023206802205,
|
|
"calibration/prompt_uniqueness": 0.863943656103668,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1064.4,
|
|
"completions/max_terminated_length": 782.0,
|
|
"completions/mean_length": 191.0904296875,
|
|
"completions/mean_terminated_length": 190.82761840820314,
|
|
"completions/min_length": 85.8,
|
|
"completions/min_terminated_length": 85.8,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.0010152794420719147,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 721063366.0,
|
|
"reward": 0.9351613402366639,
|
|
"reward_std": 0.08508041054010392,
|
|
"rewards/accuracy_reward": 0.54443359375,
|
|
"rewards/brier_reward": 0.7874362349510193,
|
|
"rewards/confidence_uniqueness_reward": 0.9589925885200501,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003119149315170944,
|
|
"rewards/frontier_coverage_0": 0.10003266781568527,
|
|
"rewards/frontier_coverage_1": 0.10003266781568527,
|
|
"rewards/frontier_coverage_10": 0.10003266781568527,
|
|
"rewards/frontier_coverage_15": 0.09940593391656875,
|
|
"rewards/frontier_coverage_20": 0.0850291058421135,
|
|
"rewards/frontier_coverage_25": 0.048729277402162555,
|
|
"rewards/frontier_coverage_5": 0.10003266781568527,
|
|
"rewards/frontier_ece_reward": 0.004082085704430938,
|
|
"rewards/frontier_entropy_batch_reward": -0.198372682929039,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.099102783203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13263332694768906,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0495513916015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0495513916015625,
|
|
"signal/advantage_abs_mean": 0.06450984179973603,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06450984179973603,
|
|
"signal/advantage_pre_scale_std": 0.10448751300573349,
|
|
"signal/advantage_std": 0.10448751300573349,
|
|
"signal/brier_reward/centered_abs_mean": 0.12233641296625138,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85078125,
|
|
"signal/brier_reward/group_std_mean": 0.15672328174114228,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012233641929924488,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012233641929924488,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012578487582504749,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9265625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016628415510058402,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012578487861901523,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012578487861901523,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002936544781550765,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7078125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004950050543993711,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.670681326184422e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.670681326184422e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.16857316195964814,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21436219811439514,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16857316195964814,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21436219811439514,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16857316195964814,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21436219811439514,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16726841926574706,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21273342669010162,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00209085529204458,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00209085529204458,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14143361896276474,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.851953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1803019016981125,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017679202603176237,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017679202603176237,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.07234455198049546,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.90390625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09286017566919327,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009043069556355476,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009043069556355476,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16857316195964814,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21436219811439514,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021071645431220533,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007802222948521375,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.825390625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011409426480531693,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007802223321050405,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007802223321050405,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2717812657356262,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730859375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34891357421875,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02717812769114971,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02717812769114971,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2431222558267499,
|
|
"calibration/batch_distribution_entropy": 0.9704915481336872,
|
|
"calibration/batch_entropy_100bins": 0.95218267752457,
|
|
"calibration/batch_entropy_10bins": 0.9704915481336872,
|
|
"calibration/batch_entropy_50bins": 0.9658659766924742,
|
|
"calibration/batch_uniqueness": 0.9607759885101235,
|
|
"calibration/buffer_distribution_entropy": 0.9988664602008029,
|
|
"calibration/buffer_entropy_100bins": 0.9911952395475305,
|
|
"calibration/buffer_entropy_10bins": 0.9988664602008029,
|
|
"calibration/buffer_entropy_50bins": 0.9952006753696396,
|
|
"calibration/confidence_entropy": 0.4901460074658397,
|
|
"calibration/coverage@0%": 0.016022504892367905,
|
|
"calibration/coverage@1%": 0.016022504892367905,
|
|
"calibration/coverage@10%": 0.1179756298923679,
|
|
"calibration/coverage@15%": 0.22891848091976516,
|
|
"calibration/coverage@20%": 0.37267841854207434,
|
|
"calibration/coverage@25%": 0.5801140533268102,
|
|
"calibration/coverage@30%": 0.6719300391389432,
|
|
"calibration/coverage@5%": 0.048053754892367906,
|
|
"calibration/ece": 0.11020522398245616,
|
|
"calibration/mean_confidence": 0.5436369682894794,
|
|
"calibration/prompt_uniqueness": 0.8631135779786681,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 753.2,
|
|
"completions/max_terminated_length": 586.6,
|
|
"completions/mean_length": 189.27822265625,
|
|
"completions/mean_terminated_length": 189.1458312988281,
|
|
"completions/min_length": 83.6,
|
|
"completions/min_terminated_length": 83.6,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0008683862979523838,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 737867719.0,
|
|
"reward": 0.9372278213500976,
|
|
"reward_std": 0.08068549633026123,
|
|
"rewards/accuracy_reward": 0.54736328125,
|
|
"rewards/brier_reward": 0.7968594074249268,
|
|
"rewards/confidence_uniqueness_reward": 0.9608587741851806,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003032087814062834,
|
|
"rewards/frontier_coverage_0": 0.09889980629086495,
|
|
"rewards/frontier_coverage_1": 0.09889980629086495,
|
|
"rewards/frontier_coverage_10": 0.09889980629086495,
|
|
"rewards/frontier_coverage_15": 0.09856819957494736,
|
|
"rewards/frontier_coverage_20": 0.08530885577201844,
|
|
"rewards/frontier_coverage_25": 0.051339687407016756,
|
|
"rewards/frontier_coverage_5": 0.09889980629086495,
|
|
"rewards/frontier_ece_reward": 0.004148419946432114,
|
|
"rewards/frontier_entropy_batch_reward": -0.20438967049121856,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083721923828125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11213247925043106,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0418609619140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0418609619140625,
|
|
"signal/advantage_abs_mean": 0.06281042322516442,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06281042322516442,
|
|
"signal/advantage_pre_scale_std": 0.10061680972576141,
|
|
"signal/advantage_std": 0.10061680972576141,
|
|
"signal/brier_reward/centered_abs_mean": 0.11816587895154954,
|
|
"signal/brier_reward/group_bin_occupancy": 0.853125,
|
|
"signal/brier_reward/group_std_mean": 0.1513270229101181,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01181658823043108,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01181658823043108,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011747047305107117,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9234375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015102297998964787,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001174704753793776,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001174704753793776,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002884101867675781,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71328125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004772100504487753,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6051273491466417e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6051273491466417e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15133111774921418,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1936686307191849,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15133111774921418,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1936686307191849,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15133111774921418,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1936686307191849,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15005215704441072,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.864453125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19203002452850343,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018756520003080368,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018756520003080368,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1224544808268547,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.85546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.15708767175674437,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015306809917092323,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015306809917092323,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06395273804664611,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.912890625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08253951072692871,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007994092302396894,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007994092302396894,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15133111774921418,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1936686307191849,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018916390370577573,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007144089136272669,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.826171875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010504491440951825,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007144089206121862,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007144089206121862,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265256404876709,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.339794796705246,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02652563974261284,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02652563974261284,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23301790118207893,
|
|
"calibration/batch_distribution_entropy": 0.9875498553783582,
|
|
"calibration/batch_entropy_100bins": 0.9644342655540867,
|
|
"calibration/batch_entropy_10bins": 0.9875498553783582,
|
|
"calibration/batch_entropy_50bins": 0.9791999063823041,
|
|
"calibration/batch_uniqueness": 0.9621734619140625,
|
|
"calibration/buffer_distribution_entropy": 0.998861625242886,
|
|
"calibration/buffer_entropy_100bins": 0.9913135794668897,
|
|
"calibration/buffer_entropy_10bins": 0.998861625242886,
|
|
"calibration/buffer_entropy_50bins": 0.9952531135075292,
|
|
"calibration/confidence_entropy": 0.5181585521770237,
|
|
"calibration/coverage@0%": 0.0671875,
|
|
"calibration/coverage@1%": 0.08359375,
|
|
"calibration/coverage@10%": 0.22265625,
|
|
"calibration/coverage@15%": 0.281640625,
|
|
"calibration/coverage@20%": 0.43515625,
|
|
"calibration/coverage@25%": 0.585546875,
|
|
"calibration/coverage@30%": 0.692578125,
|
|
"calibration/coverage@5%": 0.160546875,
|
|
"calibration/ece": 0.131002530713914,
|
|
"calibration/mean_confidence": 0.5249371858462698,
|
|
"calibration/prompt_uniqueness": 0.870068359375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 731.4,
|
|
"completions/max_terminated_length": 731.4,
|
|
"completions/mean_length": 191.42177734375,
|
|
"completions/mean_terminated_length": 191.42177734375,
|
|
"completions/min_length": 82.6,
|
|
"completions/min_terminated_length": 82.6,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0009743034606799483,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 754837734.0,
|
|
"reward": 0.9488389015197753,
|
|
"reward_std": 0.08193524926900864,
|
|
"rewards/accuracy_reward": 0.57021484375,
|
|
"rewards/brier_reward": 0.8064169526100159,
|
|
"rewards/confidence_uniqueness_reward": 0.9598495483398437,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0021567588206380605,
|
|
"rewards/frontier_coverage_0": 0.08862596154212951,
|
|
"rewards/frontier_coverage_1": 0.08862596154212951,
|
|
"rewards/frontier_coverage_10": 0.08861215263605118,
|
|
"rewards/frontier_coverage_15": 0.0873618446290493,
|
|
"rewards/frontier_coverage_20": 0.07677424550056458,
|
|
"rewards/frontier_coverage_25": 0.04842212200164795,
|
|
"rewards/frontier_coverage_5": 0.08862596154212951,
|
|
"rewards/frontier_ece_reward": 0.0034100091550499203,
|
|
"rewards/frontier_entropy_batch_reward": -0.20297325849533082,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.086773681640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12210773676633835,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0433868408203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0433868408203125,
|
|
"signal/advantage_abs_mean": 0.061074144393205645,
|
|
"signal/advantage_pre_scale_abs_mean": 0.061074144393205645,
|
|
"signal/advantage_pre_scale_std": 0.09964745044708252,
|
|
"signal/advantage_std": 0.09964745044708252,
|
|
"signal/brier_reward/centered_abs_mean": 0.10820089429616928,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8640625,
|
|
"signal/brier_reward/group_std_mean": 0.1395553916692734,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010820089280605317,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010820089280605317,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012313938140869141,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.9390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015382156148552895,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012313938699662686,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012313938699662686,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019083557184785605,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70859375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003126844298094511,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.385444749961607e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.385444749961607e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15149271190166474,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19624074995517732,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15149271190166474,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19624074995517732,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15089576244354247,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1954701155424118,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018861971329897642,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018861971329897642,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14784342050552368,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19153738617897034,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018480427097529173,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018480427097529173,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11549332290887833,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14993580281734467,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014436665922403335,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014436665922403335,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0574177585542202,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07405912727117539,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007177219958975911,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007177219958975911,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15149271190166474,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19624074995517732,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001893658982589841,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006441084947437048,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.840625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009463933855295181,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006441084784455598,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006441084784455598,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27501477003097535,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3496582627296448,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027501478046178817,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027501478046178817,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25028817244424917,
|
|
"calibration/batch_distribution_entropy": 0.9743689912131274,
|
|
"calibration/batch_entropy_100bins": 0.9518182314907483,
|
|
"calibration/batch_entropy_10bins": 0.9743689912131274,
|
|
"calibration/batch_entropy_50bins": 0.9659027223777581,
|
|
"calibration/batch_uniqueness": 0.9613949453726036,
|
|
"calibration/buffer_distribution_entropy": 0.9989980442369921,
|
|
"calibration/buffer_entropy_100bins": 0.9914707354706082,
|
|
"calibration/buffer_entropy_10bins": 0.9989980442369921,
|
|
"calibration/buffer_entropy_50bins": 0.9953458202834676,
|
|
"calibration/confidence_entropy": 0.4951727113718006,
|
|
"calibration/coverage@0%": 0.007818615459882583,
|
|
"calibration/coverage@1%": 0.007818615459882583,
|
|
"calibration/coverage@10%": 0.11291050024461839,
|
|
"calibration/coverage@15%": 0.1961281494618395,
|
|
"calibration/coverage@20%": 0.4301194043542075,
|
|
"calibration/coverage@25%": 0.5449677409491194,
|
|
"calibration/coverage@30%": 0.6873287671232877,
|
|
"calibration/coverage@5%": 0.04455112524461839,
|
|
"calibration/ece": 0.13390157547439757,
|
|
"calibration/mean_confidence": 0.5367679906823735,
|
|
"calibration/prompt_uniqueness": 0.8548456197970864,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 970.4,
|
|
"completions/max_terminated_length": 763.0,
|
|
"completions/mean_length": 190.246484375,
|
|
"completions/mean_terminated_length": 190.1147430419922,
|
|
"completions/min_length": 85.4,
|
|
"completions/min_terminated_length": 85.4,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.0006948218797333539,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 771725442.0,
|
|
"reward": 0.9490613460540771,
|
|
"reward_std": 0.0752700299024582,
|
|
"rewards/accuracy_reward": 0.5681640625,
|
|
"rewards/brier_reward": 0.8009598612785339,
|
|
"rewards/confidence_uniqueness_reward": 0.9616155385971069,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002585971378721297,
|
|
"rewards/frontier_coverage_0": 0.09696303457021713,
|
|
"rewards/frontier_coverage_1": 0.09696303457021713,
|
|
"rewards/frontier_coverage_10": 0.09685205966234207,
|
|
"rewards/frontier_coverage_15": 0.09585188180208207,
|
|
"rewards/frontier_coverage_20": 0.08253547102212906,
|
|
"rewards/frontier_coverage_25": 0.05373050421476364,
|
|
"rewards/frontier_coverage_5": 0.09696303457021713,
|
|
"rewards/frontier_ece_reward": 0.0037560143042355775,
|
|
"rewards/frontier_entropy_batch_reward": -0.19320926070213318,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0760009765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.162890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10297959595918656,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03800048828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03800048828125,
|
|
"signal/advantage_abs_mean": 0.058193684369325635,
|
|
"signal/advantage_pre_scale_abs_mean": 0.058193684369325635,
|
|
"signal/advantage_pre_scale_std": 0.09324042946100235,
|
|
"signal/advantage_std": 0.09324042946100235,
|
|
"signal/brier_reward/centered_abs_mean": 0.11048106700181962,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84609375,
|
|
"signal/brier_reward/group_std_mean": 0.14282523095607758,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011048106662929057,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011048106662929057,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011998776532709598,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.912109375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015458272024989127,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011998776812106372,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011998776812106372,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023550010519102216,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004158449545502663,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9437512421282008e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9437512421282008e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15034229159355164,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19372088611125945,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15034229159355164,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19372088611125945,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1496051698923111,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1928351491689682,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001870064646936953,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001870064646936953,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1457270860671997,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86171875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18802883327007294,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018215886317193507,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018215886317193507,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.11060539782047271,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1432916909456253,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013825674774125218,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013825674774125218,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05945408642292023,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0763387769460678,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007431761012412607,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007431761012412607,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15034229159355164,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19372088611125945,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018792787101119756,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006753822509199381,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.82734375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010070707648992538,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006753822672180831,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006753822672180831,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26861504912376405,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3452408015727997,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02686150446534157,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02686150446534157,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.256449338221134,
|
|
"calibration/batch_distribution_entropy": 0.968129743999475,
|
|
"calibration/batch_entropy_100bins": 0.9535105366339899,
|
|
"calibration/batch_entropy_10bins": 0.968129743999475,
|
|
"calibration/batch_entropy_50bins": 0.9658034715240916,
|
|
"calibration/batch_uniqueness": 0.959322589602819,
|
|
"calibration/buffer_distribution_entropy": 0.9990122658119412,
|
|
"calibration/buffer_entropy_100bins": 0.9914938682880787,
|
|
"calibration/buffer_entropy_10bins": 0.9990122658119412,
|
|
"calibration/buffer_entropy_50bins": 0.9952962057881454,
|
|
"calibration/confidence_entropy": 0.4566113101334025,
|
|
"calibration/coverage@0%": 0.014465355919765166,
|
|
"calibration/coverage@1%": 0.014465355919765166,
|
|
"calibration/coverage@10%": 0.17503516389432486,
|
|
"calibration/coverage@15%": 0.3086740154109589,
|
|
"calibration/coverage@20%": 0.44046370474559693,
|
|
"calibration/coverage@25%": 0.5280225660469667,
|
|
"calibration/coverage@30%": 0.6393850905088063,
|
|
"calibration/coverage@5%": 0.059005014677103715,
|
|
"calibration/ece": 0.13985793070933042,
|
|
"calibration/mean_confidence": 0.4696687177806269,
|
|
"calibration/prompt_uniqueness": 0.851551728342872,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 949.0,
|
|
"completions/max_terminated_length": 544.6,
|
|
"completions/mean_length": 188.6611328125,
|
|
"completions/mean_terminated_length": 188.26605224609375,
|
|
"completions/min_length": 87.2,
|
|
"completions/min_terminated_length": 87.2,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.0008416337659582496,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 788884532.0,
|
|
"reward": 0.9455557703971863,
|
|
"reward_std": 0.08068245649337769,
|
|
"rewards/accuracy_reward": 0.566015625,
|
|
"rewards/brier_reward": 0.7956305265426635,
|
|
"rewards/confidence_uniqueness_reward": 0.9623886108398437,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.003054162277840078,
|
|
"rewards/frontier_coverage_0": 0.10228811725974082,
|
|
"rewards/frontier_coverage_1": 0.10228811725974082,
|
|
"rewards/frontier_coverage_10": 0.10170512199401856,
|
|
"rewards/frontier_coverage_15": 0.09902632944285869,
|
|
"rewards/frontier_coverage_20": 0.07840342242270708,
|
|
"rewards/frontier_coverage_25": 0.050577325746417046,
|
|
"rewards/frontier_coverage_5": 0.10181083604693413,
|
|
"rewards/frontier_ece_reward": 0.0037714077159762384,
|
|
"rewards/frontier_entropy_batch_reward": -0.21397663354873658,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0837158203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11319768130779266,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04185791015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04185791015625,
|
|
"signal/advantage_abs_mean": 0.06239245980978012,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06239245980978012,
|
|
"signal/advantage_pre_scale_std": 0.10159500986337662,
|
|
"signal/advantage_std": 0.10159500986337662,
|
|
"signal/brier_reward/centered_abs_mean": 0.113985575735569,
|
|
"signal/brier_reward/group_bin_occupancy": 0.82890625,
|
|
"signal/brier_reward/group_std_mean": 0.14819374084472656,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011398557387292386,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011398557387292386,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012159938551485538,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91640625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01617111321538687,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012159939156845211,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012159939156845211,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029334662482142448,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72265625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00483027109876275,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.666832781163975e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.666832781163975e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15212544202804565,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.853125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19812886118888856,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019015680765733123,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019015680765733123,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15212544202804565,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.853125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19812886118888856,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019015680765733123,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019015680765733123,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15115560591220856,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19689476490020752,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018894450971856714,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018894450971856714,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14616797864437103,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.84921875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1905912697315216,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001827099802903831,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001827099802903831,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10996298342943192,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.843359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14416728615760804,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013745372649282216,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013745372649282216,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05818985775113106,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.911328125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07572825103998185,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007273732335306704,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007273732335306704,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15179600417613984,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.85078125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19772669970989226,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018974500941112637,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018974500941112637,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007248471491038799,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.81953125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010719313845038414,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007248471258208156,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007248471258208156,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2787540197372437,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.739453125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3564418852329254,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027875401824712754,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027875401824712754,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.299067613553852,
|
|
"calibration/batch_distribution_entropy": 0.9776422799775325,
|
|
"calibration/batch_entropy_100bins": 0.9544932057058523,
|
|
"calibration/batch_entropy_10bins": 0.9776422799775325,
|
|
"calibration/batch_entropy_50bins": 0.9740477455082059,
|
|
"calibration/batch_uniqueness": 0.9616119384765625,
|
|
"calibration/buffer_distribution_entropy": 0.9990121183367439,
|
|
"calibration/buffer_entropy_100bins": 0.9915586285603151,
|
|
"calibration/buffer_entropy_10bins": 0.9990121183367439,
|
|
"calibration/buffer_entropy_50bins": 0.9953215015311525,
|
|
"calibration/confidence_entropy": 0.5014430602492166,
|
|
"calibration/coverage@0%": 0.066015625,
|
|
"calibration/coverage@1%": 0.066015625,
|
|
"calibration/coverage@10%": 0.216015625,
|
|
"calibration/coverage@15%": 0.26328125,
|
|
"calibration/coverage@20%": 0.366015625,
|
|
"calibration/coverage@25%": 0.455859375,
|
|
"calibration/coverage@30%": 0.491015625,
|
|
"calibration/coverage@5%": 0.090625,
|
|
"calibration/ece": 0.16444165512890357,
|
|
"calibration/mean_confidence": 0.47969150508930003,
|
|
"calibration/prompt_uniqueness": 0.868505859375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1003.0,
|
|
"completions/max_terminated_length": 624.2,
|
|
"completions/mean_length": 191.987890625,
|
|
"completions/mean_terminated_length": 191.72521362304687,
|
|
"completions/min_length": 80.8,
|
|
"completions/min_terminated_length": 80.8,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.001026144833303988,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 805783192.0,
|
|
"reward": 0.9215254068374634,
|
|
"reward_std": 0.07930080592632294,
|
|
"rewards/accuracy_reward": 0.51201171875,
|
|
"rewards/brier_reward": 0.8031093597412109,
|
|
"rewards/confidence_uniqueness_reward": 0.9619287371635437,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002836792590096593,
|
|
"rewards/frontier_coverage_0": 0.13882942795753478,
|
|
"rewards/frontier_coverage_1": 0.13882942795753478,
|
|
"rewards/frontier_coverage_10": 0.13846020698547362,
|
|
"rewards/frontier_coverage_15": 0.13496174067258834,
|
|
"rewards/frontier_coverage_20": 0.10879542678594589,
|
|
"rewards/frontier_coverage_25": 0.0587244875729084,
|
|
"rewards/frontier_coverage_5": 0.13846020698547362,
|
|
"rewards/frontier_ece_reward": 0.003680743183940649,
|
|
"rewards/frontier_entropy_batch_reward": -0.21932466328144073,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.079571533203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10974450260400773,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0397857666015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0397857666015625,
|
|
"signal/advantage_abs_mean": 0.0608487643301487,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0608487643301487,
|
|
"signal/advantage_pre_scale_std": 0.09865092337131501,
|
|
"signal/advantage_std": 0.09865092337131501,
|
|
"signal/brier_reward/centered_abs_mean": 0.10833943039178848,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8515625,
|
|
"signal/brier_reward/group_std_mean": 0.14004457592964173,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833943635225296,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010833943635225296,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012927094288170338,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.91015625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016739430651068688,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001292709424160421,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001292709424160421,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024011209141463043,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7234375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003994084335863591,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.001401055371389e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.001401055371389e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15316719114780425,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19865505993366242,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001914589968509972,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001914589968509972,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15316719114780425,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.879296875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19865505993366242,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001914589968509972,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001914589968509972,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15222469270229338,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19741056561470033,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001902808714658022,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001902808714658022,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1450010806322098,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87578125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18793485462665557,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018125135218724608,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018125135218724608,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10850205421447753,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.874609375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.14055884182453154,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013562757056206464,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013562757056206464,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05681398212909698,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.921875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07298188954591751,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007101748022250831,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007101748022250831,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15222469270229338,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.880859375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19741056561470033,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001902808714658022,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001902808714658022,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006277401559054851,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.837109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009239476174116135,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006277402047999203,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006277402047999203,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2777975261211395,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35362735390663147,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027779752761125563,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027779752761125563,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32235280813281475,
|
|
"calibration/batch_distribution_entropy": 0.9786483096457314,
|
|
"calibration/batch_entropy_100bins": 0.9500265876570528,
|
|
"calibration/batch_entropy_10bins": 0.9786483096457314,
|
|
"calibration/batch_entropy_50bins": 0.9700209996178053,
|
|
"calibration/batch_uniqueness": 0.9649993896484375,
|
|
"calibration/buffer_distribution_entropy": 0.9989889119809028,
|
|
"calibration/buffer_entropy_100bins": 0.9915393121863978,
|
|
"calibration/buffer_entropy_10bins": 0.9989889119809028,
|
|
"calibration/buffer_entropy_50bins": 0.9953476043016061,
|
|
"calibration/confidence_entropy": 0.4872507579959541,
|
|
"calibration/coverage@0%": 0.016796875,
|
|
"calibration/coverage@1%": 0.016796875,
|
|
"calibration/coverage@10%": 0.153515625,
|
|
"calibration/coverage@15%": 0.271875,
|
|
"calibration/coverage@20%": 0.31484375,
|
|
"calibration/coverage@25%": 0.343359375,
|
|
"calibration/coverage@30%": 0.3921875,
|
|
"calibration/coverage@5%": 0.114453125,
|
|
"calibration/ece": 0.15300725756631883,
|
|
"calibration/mean_confidence": 0.509216670802342,
|
|
"calibration/prompt_uniqueness": 0.8611328125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1067.4,
|
|
"completions/max_terminated_length": 785.6,
|
|
"completions/mean_length": 189.0556640625,
|
|
"completions/mean_terminated_length": 188.79233093261718,
|
|
"completions/min_length": 81.2,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0008188265492208302,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 822893490.0,
|
|
"reward": 0.9416593551635742,
|
|
"reward_std": 0.08285669684410095,
|
|
"rewards/accuracy_reward": 0.5642578125,
|
|
"rewards/brier_reward": 0.7843694448471069,
|
|
"rewards/confidence_uniqueness_reward": 0.9650574326515198,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.003067029034718871,
|
|
"rewards/frontier_coverage_0": 0.08356368988752365,
|
|
"rewards/frontier_coverage_1": 0.08356368988752365,
|
|
"rewards/frontier_coverage_10": 0.08326268717646598,
|
|
"rewards/frontier_coverage_15": 0.07886564061045646,
|
|
"rewards/frontier_coverage_20": 0.06277668662369251,
|
|
"rewards/frontier_coverage_25": 0.044081108272075654,
|
|
"rewards/frontier_coverage_5": 0.08318910598754883,
|
|
"rewards/frontier_ece_reward": 0.002441513957455754,
|
|
"rewards/frontier_entropy_batch_reward": -0.22011671662330629,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09090576171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.12054677605628968,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045452880859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045452880859375,
|
|
"signal/advantage_abs_mean": 0.06448552757501602,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06448552757501602,
|
|
"signal/advantage_pre_scale_std": 0.10121935606002808,
|
|
"signal/advantage_std": 0.10121935606002808,
|
|
"signal/brier_reward/centered_abs_mean": 0.1141832172870636,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8546875,
|
|
"signal/brier_reward/group_std_mean": 0.14592179358005525,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011418322287499904,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011418322287499904,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012644784711301326,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01663502026349306,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012644784990698099,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012644784990698099,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026621847413480283,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0041458617430180315,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.327731028548442e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.327731028548442e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1563648372888565,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.200226292014122,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001954560517333448,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001954560517333448,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1563648372888565,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.200226292014122,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001954560517333448,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001954560517333448,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1553775906562805,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19900963306427003,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019422198878601194,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019422198878601194,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1479180335998535,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18967563509941102,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018489754293113947,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018489754293113947,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10082450807094574,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1297900453209877,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001260306383483112,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001260306383483112,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.055328131467103955,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.928125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07085389196872711,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006916016573086381,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006916016573086381,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15557830333709716,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.87265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19926558434963226,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001944728777743876,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001944728777743876,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006715606153011322,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009772182628512382,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006715606432408094,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006715606432408094,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2859824955463409,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3597340643405914,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028598250076174735,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028598250076174735,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21915763575922814,
|
|
"calibration/batch_distribution_entropy": 0.9810695570789931,
|
|
"calibration/batch_entropy_100bins": 0.9486454306483759,
|
|
"calibration/batch_entropy_10bins": 0.9810695570789931,
|
|
"calibration/batch_entropy_50bins": 0.9722712681327159,
|
|
"calibration/batch_uniqueness": 0.965252685546875,
|
|
"calibration/buffer_distribution_entropy": 0.9990129339746321,
|
|
"calibration/buffer_entropy_100bins": 0.9912903876248045,
|
|
"calibration/buffer_entropy_10bins": 0.9990129339746321,
|
|
"calibration/buffer_entropy_50bins": 0.9953314983930802,
|
|
"calibration/confidence_entropy": 0.4940070201234362,
|
|
"calibration/coverage@0%": 0.041015625,
|
|
"calibration/coverage@1%": 0.041015625,
|
|
"calibration/coverage@10%": 0.23515625,
|
|
"calibration/coverage@15%": 0.365625,
|
|
"calibration/coverage@20%": 0.521484375,
|
|
"calibration/coverage@25%": 0.6296875,
|
|
"calibration/coverage@30%": 0.7234375,
|
|
"calibration/coverage@5%": 0.091015625,
|
|
"calibration/ece": 0.1043095249640625,
|
|
"calibration/mean_confidence": 0.5099823944953126,
|
|
"calibration/prompt_uniqueness": 0.863818359375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 913.4,
|
|
"completions/max_terminated_length": 492.4,
|
|
"completions/mean_length": 185.3513671875,
|
|
"completions/mean_terminated_length": 184.95630493164063,
|
|
"completions/min_length": 88.0,
|
|
"completions/min_terminated_length": 88.0,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.0009886363986879587,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 839802048.0,
|
|
"reward": 0.9549939274787903,
|
|
"reward_std": 0.08174641579389572,
|
|
"rewards/accuracy_reward": 0.59150390625,
|
|
"rewards/brier_reward": 0.809298062324524,
|
|
"rewards/confidence_uniqueness_reward": 0.9655136346817017,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.00310264159925282,
|
|
"rewards/frontier_coverage_0": 0.08750025108456612,
|
|
"rewards/frontier_coverage_1": 0.08750025108456612,
|
|
"rewards/frontier_coverage_10": 0.08737820237874985,
|
|
"rewards/frontier_coverage_15": 0.08418880626559258,
|
|
"rewards/frontier_coverage_20": 0.06368328407406806,
|
|
"rewards/frontier_coverage_25": 0.04929944053292275,
|
|
"rewards/frontier_coverage_5": 0.08737820237874985,
|
|
"rewards/frontier_ece_reward": 0.0034091237001121046,
|
|
"rewards/frontier_entropy_batch_reward": -0.25231444239616396,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084820556640625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.165625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11230973601341247,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0424102783203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0424102783203125,
|
|
"signal/advantage_abs_mean": 0.0636213093996048,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0636213093996048,
|
|
"signal/advantage_pre_scale_std": 0.10311011075973511,
|
|
"signal/advantage_std": 0.10311011075973511,
|
|
"signal/brier_reward/centered_abs_mean": 0.1033732384443283,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"signal/brier_reward/group_std_mean": 0.1343769446015358,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010337324067950248,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010337324067950248,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013043990544974803,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87109375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017386937327682973,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001304399105720222,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001304399105720222,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00283603323623538,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.704296875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0045263932552188635,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.545041545294225e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.545041545294225e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13621854037046432,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8578125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17529793679714203,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017027317779138684,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017027317779138684,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13621854037046432,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8578125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17529793679714203,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017027317779138684,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017027317779138684,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13600390702486037,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17503868341445922,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017000488704070448,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017000488704070448,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12785129249095917,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.859765625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16478919386863708,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015981412259861826,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015981412259861826,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08313901722431183,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.868359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10833462625741959,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00103923772694543,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00103923772694543,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05012721195816994,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.926953125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06447599828243256,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006265901494771243,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006265901494771243,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13600390702486037,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17503868341445922,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017000488704070448,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017000488704070448,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007046621013432741,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.829296875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010701733268797397,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000704662105999887,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000704662105999887,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29779070019721987,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7203125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36754211187362673,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02977906949818134,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02977906949818134,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4222622649766603,
|
|
"eval_calibration/batch_distribution_entropy": 0.93831284978888,
|
|
"eval_calibration/batch_entropy_100bins": 0.7008354586552061,
|
|
"eval_calibration/batch_entropy_10bins": 0.93831284978888,
|
|
"eval_calibration/batch_entropy_50bins": 0.7678645493443049,
|
|
"eval_calibration/batch_uniqueness": 0.9052734375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.999063500343571,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9911236376097536,
|
|
"eval_calibration/buffer_entropy_10bins": 0.999063500343571,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9954196166686817,
|
|
"eval_calibration/confidence_entropy": 0.48469888985633597,
|
|
"eval_calibration/coverage@0%": 0.0703125,
|
|
"eval_calibration/coverage@1%": 0.0703125,
|
|
"eval_calibration/coverage@10%": 0.0703125,
|
|
"eval_calibration/coverage@15%": 0.0703125,
|
|
"eval_calibration/coverage@20%": 0.0703125,
|
|
"eval_calibration/coverage@25%": 0.140625,
|
|
"eval_calibration/coverage@30%": 0.4296875,
|
|
"eval_calibration/coverage@5%": 0.0703125,
|
|
"eval_calibration/ece": 0.193647390625,
|
|
"eval_calibration/mean_confidence": 0.47742710937499994,
|
|
"eval_calibration/prompt_uniqueness": 0.9052734375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 417.75,
|
|
"eval_completions/max_terminated_length": 417.75,
|
|
"eval_completions/mean_length": 189.96800994873047,
|
|
"eval_completions/mean_terminated_length": 189.96800994873047,
|
|
"eval_completions/min_length": 99.5,
|
|
"eval_completions/min_terminated_length": 99.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 839802048.0,
|
|
"eval_reward": 0.8031501024961472,
|
|
"eval_reward_std": 0.2302834540605545,
|
|
"eval_rewards/accuracy_reward": 0.43359375,
|
|
"eval_rewards/brier_reward": 0.8097970336675644,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.909912109375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0030607732478529215,
|
|
"eval_rewards/frontier_coverage_0": 0.19227121397852898,
|
|
"eval_rewards/frontier_coverage_1": 0.19227121397852898,
|
|
"eval_rewards/frontier_coverage_10": 0.19090014696121216,
|
|
"eval_rewards/frontier_coverage_15": 0.17611178383231163,
|
|
"eval_rewards/frontier_coverage_20": 0.11594182625412941,
|
|
"eval_rewards/frontier_coverage_25": 0.05809914506971836,
|
|
"eval_rewards/frontier_coverage_5": 0.19090014696121216,
|
|
"eval_rewards/frontier_ece_reward": 0.0046437275595963,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 21.1941,
|
|
"eval_samples_per_second": 23.592,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.474853515625,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4946432411670685,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2374267578125,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2374267578125,
|
|
"eval_signal/advantage_abs_mean": 0.21750148758292198,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21750148758292198,
|
|
"eval_signal/advantage_pre_scale_std": 0.22778696939349174,
|
|
"eval_signal/advantage_std": 0.22778696939349174,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.17260025814175606,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.8671875,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2234898954629898,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01726002711802721,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01726002711802721,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0348052978515625,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.34375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04058399423956871,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034805297618731856,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034805297618731856,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0038759367307648063,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.7421875,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.006893252138979733,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.844920840696432e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.844920840696432e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.34364357590675354,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.41618141531944275,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004295544931665063,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004295544931665063,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34364357590675354,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.41618141531944275,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004295544931665063,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004295544931665063,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3414214551448822,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4136466532945633,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004267768119461834,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004267768119461834,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3166361153125763,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.3851661831140518,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003957951499614865,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003957951499614865,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.19747909903526306,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.24594665691256523,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024684888776391745,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024684888776391745,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08342637866735458,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.10634090937674046,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010428297682665288,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010428297682665288,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3414214551448822,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4136466532945633,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004267768119461834,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004267768119461834,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.00797420903109014,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.90625,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.011368014384061098,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007974208710948005,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007974208710948005,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.189,
|
|
"step": 250
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23524356922838585,
|
|
"calibration/batch_distribution_entropy": 0.9748226569066292,
|
|
"calibration/batch_entropy_100bins": 0.9464532567711041,
|
|
"calibration/batch_entropy_10bins": 0.9748226569066292,
|
|
"calibration/batch_entropy_50bins": 0.9691353813672773,
|
|
"calibration/batch_uniqueness": 0.9648882276804489,
|
|
"calibration/buffer_distribution_entropy": 0.9989431393987553,
|
|
"calibration/buffer_entropy_100bins": 0.9907328547968023,
|
|
"calibration/buffer_entropy_10bins": 0.9989431393987553,
|
|
"calibration/buffer_entropy_50bins": 0.9952267925200537,
|
|
"calibration/confidence_entropy": 0.48285694613510166,
|
|
"calibration/coverage@0%": 0.0140625,
|
|
"calibration/coverage@1%": 0.0140625,
|
|
"calibration/coverage@10%": 0.10546875,
|
|
"calibration/coverage@15%": 0.2125,
|
|
"calibration/coverage@20%": 0.3839920193248532,
|
|
"calibration/coverage@25%": 0.6714377446183952,
|
|
"calibration/coverage@30%": 0.798828125,
|
|
"calibration/coverage@5%": 0.03046875,
|
|
"calibration/ece": 0.13733046909174487,
|
|
"calibration/mean_confidence": 0.5263182425337444,
|
|
"calibration/prompt_uniqueness": 0.8637435674915451,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 753.4,
|
|
"completions/max_terminated_length": 585.2,
|
|
"completions/mean_length": 182.904296875,
|
|
"completions/mean_terminated_length": 182.77190551757812,
|
|
"completions/min_length": 88.8,
|
|
"completions/min_terminated_length": 88.8,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.001030449173413217,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 856774156.0,
|
|
"reward": 0.9531601071357727,
|
|
"reward_std": 0.08292276561260223,
|
|
"rewards/accuracy_reward": 0.59013671875,
|
|
"rewards/brier_reward": 0.7899926781654358,
|
|
"rewards/confidence_uniqueness_reward": 0.9667759299278259,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0026666073594242335,
|
|
"rewards/frontier_coverage_0": 0.06293640360236168,
|
|
"rewards/frontier_coverage_1": 0.06293640360236168,
|
|
"rewards/frontier_coverage_10": 0.06290345415472984,
|
|
"rewards/frontier_coverage_15": 0.06183330789208412,
|
|
"rewards/frontier_coverage_20": 0.052926937490701674,
|
|
"rewards/frontier_coverage_25": 0.04199915751814842,
|
|
"rewards/frontier_coverage_5": 0.06285227611660957,
|
|
"rewards/frontier_ece_reward": 0.002211177465505898,
|
|
"rewards/frontier_entropy_batch_reward": -0.22828937768936158,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090277099609375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11830563694238663,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451385498046875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451385498046875,
|
|
"signal/advantage_abs_mean": 0.06491014659404755,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06491014659404755,
|
|
"signal/advantage_pre_scale_std": 0.1037605032324791,
|
|
"signal/advantage_std": 0.1037605032324791,
|
|
"signal/brier_reward/centered_abs_mean": 0.11094661056995392,
|
|
"signal/brier_reward/group_bin_occupancy": 0.86171875,
|
|
"signal/brier_reward/group_std_mean": 0.1411285489797592,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011094661056995391,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011094661056995391,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012302939221262932,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.869140625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01582129541784525,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012302939547225833,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012302939547225833,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002399337338283658,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.738671875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0037662754766643047,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9991718110977673e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9991718110977673e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1482946664094925,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19012218713760376,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001853683264926076,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001853683264926076,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1482946664094925,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19012218713760376,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001853683264926076,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001853683264926076,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14743364751338958,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18905034363269807,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018429205985739828,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018429205985739828,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13868501037359238,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17817612886428832,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017335626529529692,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017335626529529692,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08847524970769882,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11435707211494446,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011059406446292997,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011059406446292997,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05190168023109436,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.920703125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06661412790417671,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006487710168585181,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006487710168585181,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1475912719964981,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18924154639244078,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018448908813297749,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018448908813297749,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006282018590718507,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009374569542706013,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006282018381170929,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006282018381170929,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29568083882331847,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3687551856040955,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029568084701895713,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029568084701895713,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2818856091214798,
|
|
"calibration/batch_distribution_entropy": 0.9755369371648703,
|
|
"calibration/batch_entropy_100bins": 0.9477719023713969,
|
|
"calibration/batch_entropy_10bins": 0.9755369371648703,
|
|
"calibration/batch_entropy_50bins": 0.9714799065497696,
|
|
"calibration/batch_uniqueness": 0.9651092529296875,
|
|
"calibration/buffer_distribution_entropy": 0.9988617474514427,
|
|
"calibration/buffer_entropy_100bins": 0.9901376392375262,
|
|
"calibration/buffer_entropy_10bins": 0.9988617474514427,
|
|
"calibration/buffer_entropy_50bins": 0.9950786388567818,
|
|
"calibration/confidence_entropy": 0.49679534425027755,
|
|
"calibration/coverage@0%": 0.039453125,
|
|
"calibration/coverage@1%": 0.039453125,
|
|
"calibration/coverage@10%": 0.203125,
|
|
"calibration/coverage@15%": 0.248046875,
|
|
"calibration/coverage@20%": 0.32890625,
|
|
"calibration/coverage@25%": 0.432421875,
|
|
"calibration/coverage@30%": 0.51171875,
|
|
"calibration/coverage@5%": 0.160546875,
|
|
"calibration/ece": 0.11443988386113282,
|
|
"calibration/mean_confidence": 0.4846168869722266,
|
|
"calibration/prompt_uniqueness": 0.867236328125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 855.6,
|
|
"completions/max_terminated_length": 669.8,
|
|
"completions/mean_length": 183.81455078125,
|
|
"completions/mean_terminated_length": 183.682763671875,
|
|
"completions/min_length": 81.4,
|
|
"completions/min_terminated_length": 81.4,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.0009330803877674043,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 873664769.0,
|
|
"reward": 0.9404654026031494,
|
|
"reward_std": 0.08173245638608932,
|
|
"rewards/accuracy_reward": 0.55751953125,
|
|
"rewards/brier_reward": 0.8090359687805175,
|
|
"rewards/confidence_uniqueness_reward": 0.9651769518852233,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002471396827604622,
|
|
"rewards/frontier_coverage_0": 0.11028219759464264,
|
|
"rewards/frontier_coverage_1": 0.11028219759464264,
|
|
"rewards/frontier_coverage_10": 0.10962048023939133,
|
|
"rewards/frontier_coverage_15": 0.10224549621343612,
|
|
"rewards/frontier_coverage_20": 0.07180028259754181,
|
|
"rewards/frontier_coverage_25": 0.053031648695468905,
|
|
"rewards/frontier_coverage_5": 0.10962048023939133,
|
|
"rewards/frontier_ece_reward": 0.0031542435754090548,
|
|
"rewards/frontier_entropy_batch_reward": -0.24287400245666504,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090777587890625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11992976069450378,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0453887939453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0453887939453125,
|
|
"signal/advantage_abs_mean": 0.06406652480363846,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06406652480363846,
|
|
"signal/advantage_pre_scale_std": 0.10220163762569427,
|
|
"signal/advantage_std": 0.10220163762569427,
|
|
"signal/brier_reward/centered_abs_mean": 0.10323716104030609,
|
|
"signal/brier_reward/group_bin_occupancy": 0.856640625,
|
|
"signal/brier_reward/group_std_mean": 0.1322301909327507,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010323716327548027,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010323716327548027,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012594187259674072,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016454468481242657,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001259418693371117,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001259418693371117,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002055089036002755,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003259465633891523,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5688614914542996e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5688614914542996e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14981609880924224,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19165619909763337,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018727012909948825,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018727012909948825,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14981609880924224,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.86796875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19165619909763337,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018727012909948825,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018727012909948825,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14883655905723572,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19037957787513732,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001860457076691091,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001860457076691091,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13789782375097276,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17621307969093322,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017237228574231267,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017237228574231267,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08962543904781342,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.87890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11485566943883896,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011203179834410547,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011203179834410547,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.052447068691253665,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9203125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06672648042440414,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006555883679538965,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006555883679538965,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14883655905723572,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19037957787513732,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001860457076691091,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001860457076691091,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006253997515887022,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.85625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009096156992018222,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006253997562453151,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006253997562453151,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.290888249874115,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3610431671142578,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029088825359940527,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029088825359940527,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3199682212623704,
|
|
"calibration/batch_distribution_entropy": 0.9698417146444305,
|
|
"calibration/batch_entropy_100bins": 0.9435716120808323,
|
|
"calibration/batch_entropy_10bins": 0.9698417146444305,
|
|
"calibration/batch_entropy_50bins": 0.9666622314375889,
|
|
"calibration/batch_uniqueness": 0.96644287109375,
|
|
"calibration/buffer_distribution_entropy": 0.9989431015829364,
|
|
"calibration/buffer_entropy_100bins": 0.9895180210867552,
|
|
"calibration/buffer_entropy_10bins": 0.9989431015829364,
|
|
"calibration/buffer_entropy_50bins": 0.9950208033407677,
|
|
"calibration/confidence_entropy": 0.4952000686126718,
|
|
"calibration/coverage@0%": 0.037109375,
|
|
"calibration/coverage@1%": 0.04140625,
|
|
"calibration/coverage@10%": 0.1703125,
|
|
"calibration/coverage@15%": 0.258984375,
|
|
"calibration/coverage@20%": 0.419140625,
|
|
"calibration/coverage@25%": 0.47734375,
|
|
"calibration/coverage@30%": 0.519921875,
|
|
"calibration/coverage@5%": 0.082421875,
|
|
"calibration/ece": 0.1614252955334596,
|
|
"calibration/mean_confidence": 0.5468318580334597,
|
|
"calibration/prompt_uniqueness": 0.86171875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 783.8,
|
|
"completions/max_terminated_length": 615.2,
|
|
"completions/mean_length": 182.1521484375,
|
|
"completions/mean_terminated_length": 182.01974487304688,
|
|
"completions/min_length": 84.8,
|
|
"completions/min_terminated_length": 84.8,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0010821467731148005,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 890544375.0,
|
|
"reward": 0.930538809299469,
|
|
"reward_std": 0.08106714338064194,
|
|
"rewards/accuracy_reward": 0.5341796875,
|
|
"rewards/brier_reward": 0.8012025237083436,
|
|
"rewards/confidence_uniqueness_reward": 0.9661448240280152,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003271967126056552,
|
|
"rewards/frontier_coverage_0": 0.11554919332265853,
|
|
"rewards/frontier_coverage_1": 0.11554919332265853,
|
|
"rewards/frontier_coverage_10": 0.11485711932182312,
|
|
"rewards/frontier_coverage_15": 0.11126702874898911,
|
|
"rewards/frontier_coverage_20": 0.0689346432685852,
|
|
"rewards/frontier_coverage_25": 0.04922807216644287,
|
|
"rewards/frontier_coverage_5": 0.11537581384181976,
|
|
"rewards/frontier_ece_reward": 0.0030869925394654274,
|
|
"rewards/frontier_entropy_batch_reward": -0.22139265537261962,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08153076171875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11394334435462952,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040765380859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.040765380859375,
|
|
"signal/advantage_abs_mean": 0.061507892608642575,
|
|
"signal/advantage_pre_scale_abs_mean": 0.061507892608642575,
|
|
"signal/advantage_pre_scale_std": 0.10090996772050857,
|
|
"signal/advantage_std": 0.10090996772050857,
|
|
"signal/brier_reward/centered_abs_mean": 0.10592394173145295,
|
|
"signal/brier_reward/group_bin_occupancy": 0.841796875,
|
|
"signal/brier_reward/group_std_mean": 0.13790196180343628,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01059239376336336,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01059239376336336,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012609278596937657,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.865234375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016177338361740113,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012609278550371529,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012609278550371529,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029984854627400637,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721875,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0048636754509061575,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7481067192857156e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7481067192857156e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1415121629834175,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18623048067092896,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0017689020838588475,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0017689020838588475,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1415121629834175,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18623048067092896,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0017689020838588475,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0017689020838588475,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14053474068641664,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.851171875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18495951294898988,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017566842725500464,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017566842725500464,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1342177927494049,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17671539783477783,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016777224140241743,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016777224140241743,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08487182259559631,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11134538352489472,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010608977987430989,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010608977987430989,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05198915079236031,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.91640625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06686145663261414,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000649864412844181,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000649864412844181,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14084831327199937,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.851171875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18535879552364348,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017606039065867663,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017606039065867663,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0055714274756610395,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.877734375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007441604882478714,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005571427405811846,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005571427405811846,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2792421877384186,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71484375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35623074769973756,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.027924218401312827,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027924218401312827,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26688358629600345,
|
|
"calibration/batch_distribution_entropy": 0.956438995609551,
|
|
"calibration/batch_entropy_100bins": 0.9302209820641183,
|
|
"calibration/batch_entropy_10bins": 0.956438995609551,
|
|
"calibration/batch_entropy_50bins": 0.9543006144858946,
|
|
"calibration/batch_uniqueness": 0.96548082464166,
|
|
"calibration/buffer_distribution_entropy": 0.9990004575823258,
|
|
"calibration/buffer_entropy_100bins": 0.9889268859990621,
|
|
"calibration/buffer_entropy_10bins": 0.9990004575823258,
|
|
"calibration/buffer_entropy_50bins": 0.99498162743099,
|
|
"calibration/confidence_entropy": 0.48691284355710457,
|
|
"calibration/coverage@0%": 0.01328125,
|
|
"calibration/coverage@1%": 0.01328125,
|
|
"calibration/coverage@10%": 0.168359375,
|
|
"calibration/coverage@15%": 0.240234375,
|
|
"calibration/coverage@20%": 0.323828125,
|
|
"calibration/coverage@25%": 0.4125,
|
|
"calibration/coverage@30%": 0.5446076932485322,
|
|
"calibration/coverage@5%": 0.084375,
|
|
"calibration/ece": 0.1455257910339714,
|
|
"calibration/mean_confidence": 0.6091981775937194,
|
|
"calibration/prompt_uniqueness": 0.8710765763202393,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 1098.6,
|
|
"completions/max_terminated_length": 918.8,
|
|
"completions/mean_length": 181.7109375,
|
|
"completions/mean_terminated_length": 181.44696960449218,
|
|
"completions/min_length": 86.0,
|
|
"completions/min_terminated_length": 86.0,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.0010378322331234813,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 907391911.0,
|
|
"reward": 0.9530243515968323,
|
|
"reward_std": 0.08318499326705933,
|
|
"rewards/accuracy_reward": 0.59248046875,
|
|
"rewards/brier_reward": 0.7973593950271607,
|
|
"rewards/confidence_uniqueness_reward": 0.9641671776771545,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002714644838124514,
|
|
"rewards/frontier_coverage_0": 0.07732260525226593,
|
|
"rewards/frontier_coverage_1": 0.07732260525226593,
|
|
"rewards/frontier_coverage_10": 0.07700667977333069,
|
|
"rewards/frontier_coverage_15": 0.0751921996474266,
|
|
"rewards/frontier_coverage_20": 0.056314506381750104,
|
|
"rewards/frontier_coverage_25": 0.05060553103685379,
|
|
"rewards/frontier_coverage_5": 0.0774739071726799,
|
|
"rewards/frontier_ece_reward": 0.002765231346711516,
|
|
"rewards/frontier_entropy_batch_reward": -0.2565395474433899,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.090386962890625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11810308396816253,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451934814453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0451934814453125,
|
|
"signal/advantage_abs_mean": 0.0654950737953186,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0654950737953186,
|
|
"signal/advantage_pre_scale_std": 0.10269584357738495,
|
|
"signal/advantage_std": 0.10269584357738495,
|
|
"signal/brier_reward/centered_abs_mean": 0.10956264287233353,
|
|
"signal/brier_reward/group_bin_occupancy": 0.851171875,
|
|
"signal/brier_reward/group_std_mean": 0.1409148782491684,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956264473497868,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010956264473497868,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014184213988482953,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.834765625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.018567436560988426,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014184214174747466,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014184214174747466,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027129411697387694,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0043897018767893314,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.391176614968572e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.391176614968572e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1481944888830185,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18974127769470214,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018524311250075697,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018524311250075697,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1481944888830185,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18974127769470214,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018524311250075697,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018524311250075697,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14702675938606263,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1882396310567856,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018378345994278789,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018378345994278789,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14057752192020417,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.858203125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1800085186958313,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017572190146893263,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017572190146893263,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08514007031917573,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10977080911397934,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010642508743330837,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010642508743330837,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05421077758073807,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06963766515254974,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006776347407139837,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006776347407139837,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1480186551809311,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.866015625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18951984047889708,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018502332037314772,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018502332037314772,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005847407225519419,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.850390625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007833207491785288,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005847407272085547,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005847407272085547,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3008589863777161,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.715625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.371851509809494,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03008589893579483,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03008589893579483,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3599908256968588,
|
|
"calibration/batch_distribution_entropy": 0.9765293546573472,
|
|
"calibration/batch_entropy_100bins": 0.9479550555184086,
|
|
"calibration/batch_entropy_10bins": 0.9765293546573472,
|
|
"calibration/batch_entropy_50bins": 0.9718896673551072,
|
|
"calibration/batch_uniqueness": 0.9620710457371701,
|
|
"calibration/buffer_distribution_entropy": 0.9989072670831562,
|
|
"calibration/buffer_entropy_100bins": 0.9881814276419251,
|
|
"calibration/buffer_entropy_10bins": 0.9989072670831562,
|
|
"calibration/buffer_entropy_50bins": 0.9947699849601171,
|
|
"calibration/confidence_entropy": 0.4855467010850557,
|
|
"calibration/coverage@0%": 0.016816750244618393,
|
|
"calibration/coverage@1%": 0.016816750244618393,
|
|
"calibration/coverage@10%": 0.02775807240704501,
|
|
"calibration/coverage@15%": 0.06486897627201565,
|
|
"calibration/coverage@20%": 0.1316910775440313,
|
|
"calibration/coverage@25%": 0.2810864114481409,
|
|
"calibration/coverage@30%": 0.3869679549902153,
|
|
"calibration/coverage@5%": 0.016816750244618393,
|
|
"calibration/ece": 0.13908466566091793,
|
|
"calibration/mean_confidence": 0.47106601660332437,
|
|
"calibration/prompt_uniqueness": 0.851669403616025,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 730.0,
|
|
"completions/max_terminated_length": 618.6,
|
|
"completions/mean_length": 175.8171875,
|
|
"completions/mean_terminated_length": 175.41910095214843,
|
|
"completions/min_length": 77.6,
|
|
"completions/min_terminated_length": 77.6,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.0011793546145781875,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 924339351.0,
|
|
"reward": 0.9199577450752259,
|
|
"reward_std": 0.08229250609874725,
|
|
"rewards/accuracy_reward": 0.5189453125,
|
|
"rewards/brier_reward": 0.7955085277557373,
|
|
"rewards/confidence_uniqueness_reward": 0.9621264696121216,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.002559547405689955,
|
|
"rewards/frontier_coverage_0": 0.12974209040403367,
|
|
"rewards/frontier_coverage_1": 0.12974209040403367,
|
|
"rewards/frontier_coverage_10": 0.12853662222623824,
|
|
"rewards/frontier_coverage_15": 0.12217865586280822,
|
|
"rewards/frontier_coverage_20": 0.0753389410674572,
|
|
"rewards/frontier_coverage_25": 0.05095566734671593,
|
|
"rewards/frontier_coverage_5": 0.12889423370361328,
|
|
"rewards/frontier_ece_reward": 0.0026560436934232714,
|
|
"rewards/frontier_entropy_batch_reward": -0.24932879209518433,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09031982421875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11694404035806656,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045159912109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045159912109375,
|
|
"signal/advantage_abs_mean": 0.06529273688793183,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06529273688793183,
|
|
"signal/advantage_pre_scale_std": 0.102424056828022,
|
|
"signal/advantage_std": 0.102424056828022,
|
|
"signal/brier_reward/centered_abs_mean": 0.1097784698009491,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"signal/brier_reward/group_std_mean": 0.14118833541870118,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010977847129106521,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010977847129106521,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014931019768118859,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.850390625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019864151254296303,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014931020326912404,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014931020326912404,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814434766769,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021499829599633813,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034981849137693645,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6874786999542267e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6874786999542267e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.162190243601799,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.2070352703332901,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020273780450224877,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020273780450224877,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.162190243601799,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2070352703332901,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020273780450224877,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020273780450224877,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16074737310409545,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2051818400621414,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002009342284873128,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002009342284873128,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15363400876522065,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19600152373313903,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019204251701012253,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019204251701012253,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09273725599050522,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1185536801815033,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011592157417908311,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011592157417908311,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053829978406429294,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9140625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06915899068117141,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006728747393935919,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006728747393935919,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16112555861473082,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.872265625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2056680828332901,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00201406953856349,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00201406953856349,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00555073544383049,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.85859375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007629283983260393,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005550735630095005,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005550735630095005,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958798289299011,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.71875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37051703929901125,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587984085083008,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587984085083008,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3397437648873166,
|
|
"calibration/batch_distribution_entropy": 0.9796197507037563,
|
|
"calibration/batch_entropy_100bins": 0.951087428797685,
|
|
"calibration/batch_entropy_10bins": 0.9796197507037563,
|
|
"calibration/batch_entropy_50bins": 0.9718575724598498,
|
|
"calibration/batch_uniqueness": 0.965020751953125,
|
|
"calibration/buffer_distribution_entropy": 0.9988991354973707,
|
|
"calibration/buffer_entropy_100bins": 0.9878559564647965,
|
|
"calibration/buffer_entropy_10bins": 0.9988991354973707,
|
|
"calibration/buffer_entropy_50bins": 0.9948360874758867,
|
|
"calibration/confidence_entropy": 0.49223803297364005,
|
|
"calibration/coverage@0%": 0.027734375,
|
|
"calibration/coverage@1%": 0.027734375,
|
|
"calibration/coverage@10%": 0.08359375,
|
|
"calibration/coverage@15%": 0.121875,
|
|
"calibration/coverage@20%": 0.1984375,
|
|
"calibration/coverage@25%": 0.38515625,
|
|
"calibration/coverage@30%": 0.4953125,
|
|
"calibration/coverage@5%": 0.05703125,
|
|
"calibration/ece": 0.13973601884101564,
|
|
"calibration/mean_confidence": 0.4944046061589843,
|
|
"calibration/prompt_uniqueness": 0.854248046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 751.2,
|
|
"completions/max_terminated_length": 593.2,
|
|
"completions/mean_length": 174.5083984375,
|
|
"completions/mean_terminated_length": 174.11077270507812,
|
|
"completions/min_length": 81.2,
|
|
"completions/min_terminated_length": 81.2,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.0028364313766360283,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 941237165.0,
|
|
"reward": 0.935793137550354,
|
|
"reward_std": 0.07484155595302582,
|
|
"rewards/accuracy_reward": 0.54541015625,
|
|
"rewards/brier_reward": 0.8021585941314697,
|
|
"rewards/confidence_uniqueness_reward": 0.9650752425193787,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0026960095157846807,
|
|
"rewards/frontier_coverage_0": 0.11437956839799882,
|
|
"rewards/frontier_coverage_1": 0.11437956839799882,
|
|
"rewards/frontier_coverage_10": 0.11248253881931305,
|
|
"rewards/frontier_coverage_15": 0.10827968120574952,
|
|
"rewards/frontier_coverage_20": 0.07057406008243561,
|
|
"rewards/frontier_coverage_25": 0.05096975192427635,
|
|
"rewards/frontier_coverage_5": 0.11267611980438233,
|
|
"rewards/frontier_ece_reward": 0.0022103100549429656,
|
|
"rewards/frontier_entropy_batch_reward": -0.2222293496131897,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.076336669921875,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10640045404434204,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0381683349609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0381683349609375,
|
|
"signal/advantage_abs_mean": 0.05686543136835098,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05686543136835098,
|
|
"signal/advantage_pre_scale_std": 0.09319915175437928,
|
|
"signal/advantage_std": 0.09319915175437928,
|
|
"signal/brier_reward/centered_abs_mean": 0.1045459121465683,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85078125,
|
|
"signal/brier_reward/group_std_mean": 0.13514408171176912,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010454590804874897,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010454590804874897,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012861154228448867,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.86171875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.017182295396924018,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012861154275014997,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012861154275014997,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.0016572814434766769,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021444797981530427,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0034413845278322697,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6805997185874732e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6805997185874732e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.15001226961612701,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19234943985939026,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001875153393484652,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001875153393484652,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15001226961612701,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19234943985939026,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001875153393484652,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001875153393484652,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14806943833827974,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18986817002296447,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018508680164813994,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018508680164813994,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13712047040462494,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17578884959220886,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017140058567747473,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017140058567747473,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08517580181360244,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10908302515745164,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010646975366398691,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010646975366398691,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05108058974146843,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.910546875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06581792756915092,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006385074113495648,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006385074113495648,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14839180409908295,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.867578125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1902903586626053,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001854897616431117,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001854897616431117,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005655341129750013,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.843359375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008241251390427352,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005655340966768563,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005655340966768563,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28620743155479433,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.707421875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35727530121803286,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02862074300646782,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02862074300646782,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34525926666499224,
|
|
"calibration/batch_distribution_entropy": 0.9790175360990441,
|
|
"calibration/batch_entropy_100bins": 0.9482314322026811,
|
|
"calibration/batch_entropy_10bins": 0.9790175360990441,
|
|
"calibration/batch_entropy_50bins": 0.9722607174037909,
|
|
"calibration/batch_uniqueness": 0.9663451804952441,
|
|
"calibration/buffer_distribution_entropy": 0.9988060335454433,
|
|
"calibration/buffer_entropy_100bins": 0.9873172216290733,
|
|
"calibration/buffer_entropy_10bins": 0.9988060335454433,
|
|
"calibration/buffer_entropy_50bins": 0.9947503690244401,
|
|
"calibration/confidence_entropy": 0.49496733877215143,
|
|
"calibration/coverage@0%": 0.020322437622309196,
|
|
"calibration/coverage@1%": 0.020322437622309196,
|
|
"calibration/coverage@10%": 0.10869159735812133,
|
|
"calibration/coverage@15%": 0.23450266022504893,
|
|
"calibration/coverage@20%": 0.3384211411448141,
|
|
"calibration/coverage@25%": 0.4357211656066536,
|
|
"calibration/coverage@30%": 0.4935504831213307,
|
|
"calibration/coverage@5%": 0.06336227984344422,
|
|
"calibration/ece": 0.1643798130198141,
|
|
"calibration/mean_confidence": 0.5136691958170254,
|
|
"calibration/prompt_uniqueness": 0.8665675708084027,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 917.2,
|
|
"completions/max_terminated_length": 593.2,
|
|
"completions/mean_length": 174.08798828125,
|
|
"completions/mean_terminated_length": 173.82220153808595,
|
|
"completions/min_length": 78.2,
|
|
"completions/min_terminated_length": 78.2,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0006290775490924716,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 958071122.0,
|
|
"reward": 0.9379110097885132,
|
|
"reward_std": 0.07883523255586625,
|
|
"rewards/accuracy_reward": 0.55283203125,
|
|
"rewards/brier_reward": 0.8023535490036011,
|
|
"rewards/confidence_uniqueness_reward": 0.966447937488556,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002613516733981669,
|
|
"rewards/frontier_coverage_0": 0.09809458376839757,
|
|
"rewards/frontier_coverage_1": 0.09809458376839757,
|
|
"rewards/frontier_coverage_10": 0.09699874348007143,
|
|
"rewards/frontier_coverage_15": 0.09137383892666548,
|
|
"rewards/frontier_coverage_20": 0.06403161454945802,
|
|
"rewards/frontier_coverage_25": 0.05141137093305588,
|
|
"rewards/frontier_coverage_5": 0.09733490133658051,
|
|
"rewards/frontier_ece_reward": 0.0027125254506245255,
|
|
"rewards/frontier_entropy_batch_reward": -0.22992810904979705,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.078399658203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.165234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10746027380228043,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0391998291015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0391998291015625,
|
|
"signal/advantage_abs_mean": 0.06045843511819839,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06045843511819839,
|
|
"signal/advantage_pre_scale_std": 0.0974207267165184,
|
|
"signal/advantage_std": 0.0974207267165184,
|
|
"signal/brier_reward/centered_abs_mean": 0.10833604633808136,
|
|
"signal/brier_reward/group_bin_occupancy": 0.858203125,
|
|
"signal/brier_reward/group_std_mean": 0.13938207030296326,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010833604633808136,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010833604633808136,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012309185788035392,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.85,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016317157819867135,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012309186393395066,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012309186393395066,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002327501564286649,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.724609375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003778242599219084,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.909376962634269e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.909376962634269e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14588625729084015,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1870903730392456,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018235782859846949,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018235782859846949,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14588625729084015,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1870903730392456,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018235782859846949,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018235782859846949,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14415820091962814,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18487805426120757,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001801977539435029,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001801977539435029,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13210653364658356,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8671875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16943660378456116,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016513317124918104,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016513317124918104,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08194544017314911,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10515519231557846,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001024318009149283,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001024318009149283,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053516195714473726,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06882122904062271,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006689524743705988,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006689524743705988,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14475657939910888,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18564701378345488,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018094572937116028,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018094572937116028,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00575404018163681,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.846875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008270268887281417,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005754040437750518,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005754040437750518,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2830525994300842,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35513145923614503,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028305261209607125,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028305261209607125,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4236405616968358,
|
|
"calibration/batch_distribution_entropy": 0.9818024399334021,
|
|
"calibration/batch_entropy_100bins": 0.9521496186391095,
|
|
"calibration/batch_entropy_10bins": 0.9818024399334021,
|
|
"calibration/batch_entropy_50bins": 0.9757064291499068,
|
|
"calibration/batch_uniqueness": 0.9665863037109375,
|
|
"calibration/buffer_distribution_entropy": 0.9987764022593268,
|
|
"calibration/buffer_entropy_100bins": 0.9868602362750798,
|
|
"calibration/buffer_entropy_10bins": 0.9987764022593268,
|
|
"calibration/buffer_entropy_50bins": 0.9947180018038975,
|
|
"calibration/confidence_entropy": 0.5021496893703811,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.014453125,
|
|
"calibration/coverage@15%": 0.028515625,
|
|
"calibration/coverage@20%": 0.050390625,
|
|
"calibration/coverage@25%": 0.062109375,
|
|
"calibration/coverage@30%": 0.205078125,
|
|
"calibration/coverage@5%": 0.00234375,
|
|
"calibration/ece": 0.13476296875,
|
|
"calibration/mean_confidence": 0.506945,
|
|
"calibration/prompt_uniqueness": 0.864794921875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 853.6,
|
|
"completions/max_terminated_length": 853.6,
|
|
"completions/mean_length": 172.291015625,
|
|
"completions/mean_terminated_length": 172.291015625,
|
|
"completions/min_length": 79.8,
|
|
"completions/min_terminated_length": 79.8,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0007745189359411597,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0001,
|
|
"num_tokens": 974862198.0,
|
|
"reward": 0.9235079884529114,
|
|
"reward_std": 0.07905451804399491,
|
|
"rewards/accuracy_reward": 0.5291015625,
|
|
"rewards/brier_reward": 0.7884802699089051,
|
|
"rewards/confidence_uniqueness_reward": 0.9643653869628906,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0033654853235930205,
|
|
"rewards/frontier_coverage_0": 0.11306976824998856,
|
|
"rewards/frontier_coverage_1": 0.11306976824998856,
|
|
"rewards/frontier_coverage_10": 0.1117068201303482,
|
|
"rewards/frontier_coverage_15": 0.1046798437833786,
|
|
"rewards/frontier_coverage_20": 0.07044542729854583,
|
|
"rewards/frontier_coverage_25": 0.05282995253801346,
|
|
"rewards/frontier_coverage_5": 0.1121548593044281,
|
|
"rewards/frontier_ece_reward": 0.002680363832041621,
|
|
"rewards/frontier_entropy_batch_reward": -0.2502776861190796,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07894287109375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10790681540966034,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039471435546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.039471435546875,
|
|
"signal/advantage_abs_mean": 0.06104508712887764,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06104508712887764,
|
|
"signal/advantage_pre_scale_std": 0.09825572371482849,
|
|
"signal/advantage_std": 0.09825572371482849,
|
|
"signal/brier_reward/centered_abs_mean": 0.11147891283035279,
|
|
"signal/brier_reward/group_bin_occupancy": 0.84375,
|
|
"signal/brier_reward/group_std_mean": 0.14375323951244354,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011147891730070114,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011147891730070114,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013390088081359863,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.87578125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016832890920341015,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0013390088919550181,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013390088919550181,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029725271509960295,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.69453125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004932621866464615,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7156591133680195e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7156591133680195e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.146697798371315,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.1891067087650299,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018337224144488573,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018337224144488573,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.146697798371315,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.873828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1891067087650299,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018337224144488573,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018337224144488573,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14494749903678894,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1868603676557541,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001811843877658248,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001811843877658248,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13379482328891754,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17280838787555694,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016724353889003396,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016724353889003396,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0833568200469017,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.88984375,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10767639130353927,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010419602738693356,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010419602738693356,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05601404085755348,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.9171875,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07191484123468399,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007001755409874022,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007001755409874022,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1458159238100052,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.8734375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1879925400018692,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018226990709081293,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018226990709081293,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005948805715888739,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.857421875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008423867449164391,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005948805715888739,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005948805715888739,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2958779692649841,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.729296875,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3708716452121735,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029587796702980996,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029587796702980996,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23491377854573506,
|
|
"calibration/batch_distribution_entropy": 0.9814271545653476,
|
|
"calibration/batch_entropy_100bins": 0.9529836470801094,
|
|
"calibration/batch_entropy_10bins": 0.9814271545653476,
|
|
"calibration/batch_entropy_50bins": 0.9721020286908271,
|
|
"calibration/batch_uniqueness": 0.9633582912444499,
|
|
"calibration/buffer_distribution_entropy": 0.9988420216830608,
|
|
"calibration/buffer_entropy_100bins": 0.9863146646855876,
|
|
"calibration/buffer_entropy_10bins": 0.9988420216830608,
|
|
"calibration/buffer_entropy_50bins": 0.9948149826034557,
|
|
"calibration/confidence_entropy": 0.5013591675015299,
|
|
"calibration/coverage@0%": 0.03478167808219178,
|
|
"calibration/coverage@1%": 0.03478167808219178,
|
|
"calibration/coverage@10%": 0.24629250244618395,
|
|
"calibration/coverage@15%": 0.3827597541585127,
|
|
"calibration/coverage@20%": 0.4863770486790607,
|
|
"calibration/coverage@25%": 0.5665117416829746,
|
|
"calibration/coverage@30%": 0.6513538099315068,
|
|
"calibration/coverage@5%": 0.09147810665362036,
|
|
"calibration/ece": 0.1117759863472358,
|
|
"calibration/mean_confidence": 0.44929093207252935,
|
|
"calibration/prompt_uniqueness": 0.8603180280957335,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 853.6,
|
|
"completions/max_terminated_length": 632.8,
|
|
"completions/mean_length": 170.640625,
|
|
"completions/mean_terminated_length": 170.50676574707032,
|
|
"completions/min_length": 78.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0011778445914387703,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 991584982.0,
|
|
"reward": 0.9295665860176087,
|
|
"reward_std": 0.08568341732025146,
|
|
"rewards/accuracy_reward": 0.54228515625,
|
|
"rewards/brier_reward": 0.7837172031402588,
|
|
"rewards/confidence_uniqueness_reward": 0.9617600560188293,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0024823052808642387,
|
|
"rewards/frontier_coverage_0": 0.10404116213321686,
|
|
"rewards/frontier_coverage_1": 0.10404116213321686,
|
|
"rewards/frontier_coverage_10": 0.10361252054572105,
|
|
"rewards/frontier_coverage_15": 0.09421005547046661,
|
|
"rewards/frontier_coverage_20": 0.06851446852087975,
|
|
"rewards/frontier_coverage_25": 0.04667741134762764,
|
|
"rewards/frontier_coverage_5": 0.10377772152423859,
|
|
"rewards/frontier_ece_reward": 0.001886871492024511,
|
|
"rewards/frontier_entropy_batch_reward": -0.23994633555412292,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.102056884765625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13405922651290894,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0510284423828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0510284423828125,
|
|
"signal/advantage_abs_mean": 0.06657596528530121,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06657596528530121,
|
|
"signal/advantage_pre_scale_std": 0.10576380938291549,
|
|
"signal/advantage_std": 0.10576380938291549,
|
|
"signal/brier_reward/centered_abs_mean": 0.11052304357290268,
|
|
"signal/brier_reward/group_bin_occupancy": 0.847265625,
|
|
"signal/brier_reward/group_std_mean": 0.14318473637104034,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01105230450630188,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01105230450630188,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014990394562482834,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8671875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.019529133662581445,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0014990394469350577,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014990394469350577,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_bin_occupancy": 0.12578125,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018933590967208148,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003138176305219531,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3666988272452728e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3666988272452728e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.17136546075344086,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.21953192353248596,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002142068138346076,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002142068138346076,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17136546075344086,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21953192353248596,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002142068138346076,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002142068138346076,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.169466295838356,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.865625,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.217143777012825,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021183287259191274,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021183287259191274,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15599824488162994,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.86640625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19977592229843139,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019499780144542455,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019499780144542455,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09491551518440247,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.12218321114778519,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011864439584314823,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011864439584314823,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05510400533676148,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.90078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07128551304340362,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006888000760227441,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006888000760227441,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1706299215555191,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2185954213142395,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00213287400547415,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00213287400547415,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005686651263386011,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8765625,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0076931707561016084,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005686651449650526,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005686651449650526,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28447132706642153,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73984375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3533455073833466,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02844713404774666,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02844713404774666,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.34800169365994993,
|
|
"calibration/batch_distribution_entropy": 0.9812203477810147,
|
|
"calibration/batch_entropy_100bins": 0.9538863455386668,
|
|
"calibration/batch_entropy_10bins": 0.9812203477810147,
|
|
"calibration/batch_entropy_50bins": 0.9751755655335739,
|
|
"calibration/batch_uniqueness": 0.9656280517578125,
|
|
"calibration/buffer_distribution_entropy": 0.9989293186275472,
|
|
"calibration/buffer_entropy_100bins": 0.9856529750536842,
|
|
"calibration/buffer_entropy_10bins": 0.9989293186275472,
|
|
"calibration/buffer_entropy_50bins": 0.9948214021847231,
|
|
"calibration/confidence_entropy": 0.47452948094860803,
|
|
"calibration/coverage@0%": 0.0078125,
|
|
"calibration/coverage@1%": 0.0078125,
|
|
"calibration/coverage@10%": 0.08203125,
|
|
"calibration/coverage@15%": 0.226171875,
|
|
"calibration/coverage@20%": 0.2671875,
|
|
"calibration/coverage@25%": 0.3203125,
|
|
"calibration/coverage@30%": 0.391015625,
|
|
"calibration/coverage@5%": 0.010546875,
|
|
"calibration/ece": 0.16367422836523438,
|
|
"calibration/mean_confidence": 0.5006282908652343,
|
|
"calibration/prompt_uniqueness": 0.855078125,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 744.0,
|
|
"completions/max_terminated_length": 550.0,
|
|
"completions/mean_length": 168.0302734375,
|
|
"completions/mean_terminated_length": 167.89690246582032,
|
|
"completions/min_length": 73.4,
|
|
"completions/min_terminated_length": 73.4,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0010172611800953746,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1008245932.0,
|
|
"reward": 0.9288432955741882,
|
|
"reward_std": 0.07382949590682983,
|
|
"rewards/accuracy_reward": 0.5306640625,
|
|
"rewards/brier_reward": 0.8049243211746215,
|
|
"rewards/confidence_uniqueness_reward": 0.9639307618141174,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0031038288958370685,
|
|
"rewards/frontier_coverage_0": 0.13451858162879943,
|
|
"rewards/frontier_coverage_1": 0.13451858162879943,
|
|
"rewards/frontier_coverage_10": 0.13342588990926743,
|
|
"rewards/frontier_coverage_15": 0.12053216546773911,
|
|
"rewards/frontier_coverage_20": 0.08194337785243988,
|
|
"rewards/frontier_coverage_25": 0.05771302729845047,
|
|
"rewards/frontier_coverage_5": 0.13427408933639526,
|
|
"rewards/frontier_ece_reward": 0.0028995629400014877,
|
|
"rewards/frontier_entropy_batch_reward": -0.23538158535957338,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07486572265625,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10253596603870392,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037432861328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037432861328125,
|
|
"signal/advantage_abs_mean": 0.056790337711572644,
|
|
"signal/advantage_pre_scale_abs_mean": 0.056790337711572644,
|
|
"signal/advantage_pre_scale_std": 0.09253572970628739,
|
|
"signal/advantage_std": 0.09253572970628739,
|
|
"signal/brier_reward/centered_abs_mean": 0.10669752955436707,
|
|
"signal/brier_reward/group_bin_occupancy": 0.825,
|
|
"signal/brier_reward/group_std_mean": 0.13931142389774323,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010669752955436707,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010669752955436707,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013811485469341278,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.859375,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01783113442361355,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001381148537620902,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001381148537620902,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002649222710169852,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.711328125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004259026004001498,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.311528380436357e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.311528380436357e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.1507669657468796,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.848046875,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19579726755619048,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001884587062522769,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001884587062522769,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1507669657468796,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.848046875,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19579726755619048,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001884587062522769,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001884587062522769,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1489147961139679,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.848046875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19344739615917206,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018614350352436303,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018614350352436303,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13582422733306884,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.847265625,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17663869857788086,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001697802823036909,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001697802823036909,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08488290458917618,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11030421555042266,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010610363446176053,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010610363446176053,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05705864131450653,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.923828125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07287163138389588,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007132330210879445,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007132330210879445,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15014611780643464,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.84765625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1949920028448105,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00187682646792382,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00187682646792382,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0062250176444649695,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.844921875,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008787142857909203,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006225017714314163,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006225017714314163,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2811248004436493,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72109375,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3526135325431824,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028112480789422988,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028112480789422988,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.4737306351081816,
|
|
"eval_calibration/batch_distribution_entropy": 0.9045024028911264,
|
|
"eval_calibration/batch_entropy_100bins": 0.6949329777280961,
|
|
"eval_calibration/batch_entropy_10bins": 0.9045024028911264,
|
|
"eval_calibration/batch_entropy_50bins": 0.7689095013084137,
|
|
"eval_calibration/batch_uniqueness": 0.8984375,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9988096055976916,
|
|
"eval_calibration/buffer_entropy_100bins": 0.9849748824561931,
|
|
"eval_calibration/buffer_entropy_10bins": 0.9988096055976916,
|
|
"eval_calibration/buffer_entropy_50bins": 0.9946333864318222,
|
|
"eval_calibration/confidence_entropy": 0.4787274667660325,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.0546875,
|
|
"eval_calibration/coverage@25%": 0.15625,
|
|
"eval_calibration/coverage@30%": 0.1640625,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.21578124999999998,
|
|
"eval_calibration/mean_confidence": 0.47687499999999994,
|
|
"eval_calibration/prompt_uniqueness": 0.8984375,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 422.0,
|
|
"eval_completions/max_terminated_length": 422.0,
|
|
"eval_completions/mean_length": 173.66567993164062,
|
|
"eval_completions/mean_terminated_length": 173.66567993164062,
|
|
"eval_completions/min_length": 87.0,
|
|
"eval_completions/min_terminated_length": 87.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1008245932.0,
|
|
"eval_reward": 0.7935565859079361,
|
|
"eval_reward_std": 0.2286548987030983,
|
|
"eval_rewards/accuracy_reward": 0.416015625,
|
|
"eval_rewards/brier_reward": 0.799243688583374,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.91015625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.003638996509835124,
|
|
"eval_rewards/frontier_coverage_0": 0.19927702844142914,
|
|
"eval_rewards/frontier_coverage_1": 0.19927702844142914,
|
|
"eval_rewards/frontier_coverage_10": 0.19661162421107292,
|
|
"eval_rewards/frontier_coverage_15": 0.17882990464568138,
|
|
"eval_rewards/frontier_coverage_20": 0.10959831066429615,
|
|
"eval_rewards/frontier_coverage_25": 0.057190462946891785,
|
|
"eval_rewards/frontier_coverage_5": 0.19842347875237465,
|
|
"eval_rewards/frontier_ece_reward": 0.004141724260989577,
|
|
"eval_rewards/frontier_entropy_batch_reward": -1.0,
|
|
"eval_runtime": 21.0746,
|
|
"eval_samples_per_second": 23.725,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4656982421875,
|
|
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49005643278360367,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23284912109375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23284912109375,
|
|
"eval_signal/advantage_abs_mean": 0.21324742957949638,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21324742957949638,
|
|
"eval_signal/advantage_pre_scale_std": 0.22612683847546577,
|
|
"eval_signal/advantage_std": 0.22612683847546577,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.18935201317071915,
|
|
"eval_signal/brier_reward/group_bin_occupancy": 0.90625,
|
|
"eval_signal/brier_reward/group_std_mean": 0.24315428733825684,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01893520262092352,
|
|
"eval_signal/brier_reward/weight": 0.10000000149011612,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01893520262092352,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0336761474609375,
|
|
"eval_signal/confidence_uniqueness_reward/group_bin_occupancy": 0.3203125,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.038827759213745594,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003367614757735282,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003367614757735282,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004632304655387998,
|
|
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.703125,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008442466845735908,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.790380964754149e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.790380964754149e-05,
|
|
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3491540476679802,
|
|
"eval_signal/frontier_coverage_0/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_0/group_std_mean": 0.42496294528245926,
|
|
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004364425898529589,
|
|
"eval_signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004364425898529589,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3491540476679802,
|
|
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.42496294528245926,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004364425898529589,
|
|
"eval_signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004364425898529589,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3441574051976204,
|
|
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4191160574555397,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0043019677978008986,
|
|
"eval_signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0043019677978008986,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.31247151643037796,
|
|
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.38298317044973373,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003905894060153514,
|
|
"eval_signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003905894060153514,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.17517539486289024,
|
|
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9140625,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.22201964259147644,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021896924590691924,
|
|
"eval_signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021896924590691924,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09041432663798332,
|
|
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.953125,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.11431009136140347,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011301790946163237,
|
|
"eval_signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011301790946163237,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793277829885483,
|
|
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.42353837192058563,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004349160008132458,
|
|
"eval_signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004349160008132458,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007637793896719813,
|
|
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.984375,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.009760213550180197,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007637794187758118,
|
|
"eval_signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007637794187758118,
|
|
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
|
|
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_steps_per_second": 0.19,
|
|
"step": 300
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25243171988357693,
|
|
"calibration/batch_distribution_entropy": 0.9739037924606638,
|
|
"calibration/batch_entropy_100bins": 0.9439049482785213,
|
|
"calibration/batch_entropy_10bins": 0.9739037924606638,
|
|
"calibration/batch_entropy_50bins": 0.9694166202887426,
|
|
"calibration/batch_uniqueness": 0.966259765625,
|
|
"calibration/buffer_distribution_entropy": 0.99871030928833,
|
|
"calibration/buffer_entropy_100bins": 0.9844605941497815,
|
|
"calibration/buffer_entropy_10bins": 0.99871030928833,
|
|
"calibration/buffer_entropy_50bins": 0.9945275683891899,
|
|
"calibration/confidence_entropy": 0.5045930767500602,
|
|
"calibration/coverage@0%": 0.035546875,
|
|
"calibration/coverage@1%": 0.035546875,
|
|
"calibration/coverage@10%": 0.274609375,
|
|
"calibration/coverage@15%": 0.432421875,
|
|
"calibration/coverage@20%": 0.501953125,
|
|
"calibration/coverage@25%": 0.563671875,
|
|
"calibration/coverage@30%": 0.607421875,
|
|
"calibration/coverage@5%": 0.09296875,
|
|
"calibration/ece": 0.11844418789062501,
|
|
"calibration/mean_confidence": 0.522810125390625,
|
|
"calibration/prompt_uniqueness": 0.87255859375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1185.4,
|
|
"completions/max_terminated_length": 604.8,
|
|
"completions/mean_length": 172.418359375,
|
|
"completions/mean_terminated_length": 171.7501678466797,
|
|
"completions/min_length": 79.6,
|
|
"completions/min_terminated_length": 79.6,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.001320485258474946,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0019,
|
|
"num_tokens": 1024872616.0,
|
|
"reward": 0.9389370799064636,
|
|
"reward_std": 0.08163964003324509,
|
|
"rewards/accuracy_reward": 0.5552734375,
|
|
"rewards/brier_reward": 0.8005570650100708,
|
|
"rewards/confidence_uniqueness_reward": 0.9648543715476989,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.00284066004678607,
|
|
"rewards/frontier_coverage_0": 0.10627357796765864,
|
|
"rewards/frontier_coverage_1": 0.10627357796765864,
|
|
"rewards/frontier_coverage_10": 0.10575458101229743,
|
|
"rewards/frontier_coverage_15": 0.09721773080527782,
|
|
"rewards/frontier_coverage_20": 0.07075041458010674,
|
|
"rewards/frontier_coverage_25": 0.051219668984413144,
|
|
"rewards/frontier_coverage_5": 0.10608085230924189,
|
|
"rewards/frontier_ece_reward": 0.0025254017557017503,
|
|
"rewards/frontier_entropy_batch_reward": -0.23258313536643982,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08868408203125,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.11722440421581268,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044342041015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044342041015625,
|
|
"signal/advantage_abs_mean": 0.06320648193359375,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06320648193359375,
|
|
"signal/advantage_pre_scale_std": 0.10139600187540054,
|
|
"signal/advantage_std": 0.10139600187540054,
|
|
"signal/brier_reward/centered_abs_mean": 0.1057712584733963,
|
|
"signal/brier_reward/group_bin_occupancy": 0.85,
|
|
"signal/brier_reward/group_std_mean": 0.13689170479774476,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0105771254748106,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0105771254748106,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013170672208070755,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.855078125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.01751931421458721,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001317067281343043,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001317067281343043,
|
|
"signal/format_reward/centered_abs_mean": 0.000909423828125,
|
|
"signal/format_reward/group_bin_occupancy": 0.126171875,
|
|
"signal/format_reward/group_std_mean": 0.002030306123197079,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004547119140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004547119140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002454556990414858,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.70625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004182360181584954,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.068196165258996e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.068196165258996e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.151848965883255,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.868359375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.19617189466953278,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001898112171329558,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001898112171329558,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.151848965883255,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.868359375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19617189466953278,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001898112171329558,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001898112171329558,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1494060769677162,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.866796875,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1930826336145401,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018675760366022587,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018675760366022587,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1364602714776993,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.860546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1767397940158844,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017057533143088222,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017057533143088222,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08021349385380745,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.875390625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.10482732057571412,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010026687057688832,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010026687057688832,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.0531280666589737,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.922265625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06855799853801728,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006641008774749934,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006641008774749934,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1511296510696411,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86640625,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19528249204158782,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018891207640990616,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018891207640990616,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006002122722566128,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.861328125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008093012310564519,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006002122885547578,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006002122885547578,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28755232095718386,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72265625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3637108564376831,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.02875523306429386,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02875523306429386,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36275381546387697,
|
|
"calibration/batch_distribution_entropy": 0.9818363303055356,
|
|
"calibration/batch_entropy_100bins": 0.9516448277738757,
|
|
"calibration/batch_entropy_10bins": 0.9818363303055356,
|
|
"calibration/batch_entropy_50bins": 0.9751574070879462,
|
|
"calibration/batch_uniqueness": 0.965179443359375,
|
|
"calibration/buffer_distribution_entropy": 0.9985763272881746,
|
|
"calibration/buffer_entropy_100bins": 0.9834172411685647,
|
|
"calibration/buffer_entropy_10bins": 0.9985763272881746,
|
|
"calibration/buffer_entropy_50bins": 0.9944091208256998,
|
|
"calibration/confidence_entropy": 0.5024404900454493,
|
|
"calibration/coverage@0%": 0.00546875,
|
|
"calibration/coverage@1%": 0.00546875,
|
|
"calibration/coverage@10%": 0.019921875,
|
|
"calibration/coverage@15%": 0.05546875,
|
|
"calibration/coverage@20%": 0.108203125,
|
|
"calibration/coverage@25%": 0.294921875,
|
|
"calibration/coverage@30%": 0.43984375,
|
|
"calibration/coverage@5%": 0.00546875,
|
|
"calibration/ece": 0.11440790949765625,
|
|
"calibration/mean_confidence": 0.4765774462054687,
|
|
"calibration/prompt_uniqueness": 0.871875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 780.4,
|
|
"completions/max_terminated_length": 620.6,
|
|
"completions/mean_length": 166.96044921875,
|
|
"completions/mean_terminated_length": 166.827734375,
|
|
"completions/min_length": 77.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0008437388460151851,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 1041710771.0,
|
|
"reward": 0.9289904713630677,
|
|
"reward_std": 0.07655752152204513,
|
|
"rewards/accuracy_reward": 0.5341796875,
|
|
"rewards/brier_reward": 0.8037751078605652,
|
|
"rewards/confidence_uniqueness_reward": 0.9644991874694824,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002967359917238355,
|
|
"rewards/frontier_coverage_0": 0.12212227135896683,
|
|
"rewards/frontier_coverage_1": 0.12212227135896683,
|
|
"rewards/frontier_coverage_10": 0.12049156278371811,
|
|
"rewards/frontier_coverage_15": 0.11204217970371247,
|
|
"rewards/frontier_coverage_20": 0.07388233989477158,
|
|
"rewards/frontier_coverage_25": 0.055122246593236925,
|
|
"rewards/frontier_coverage_5": 0.1214935302734375,
|
|
"rewards/frontier_ece_reward": 0.0031089282827451827,
|
|
"rewards/frontier_entropy_batch_reward": -0.2424273669719696,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0775146484375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1029381737112999,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03875732421875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03875732421875,
|
|
"signal/advantage_abs_mean": 0.05987899079918861,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05987899079918861,
|
|
"signal/advantage_pre_scale_std": 0.09616845995187759,
|
|
"signal/advantage_std": 0.09616845995187759,
|
|
"signal/brier_reward/centered_abs_mean": 0.10155004113912583,
|
|
"signal/brier_reward/group_bin_occupancy": 0.844140625,
|
|
"signal/brier_reward/group_std_mean": 0.1313195899128914,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010155004076659679,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010155004076659679,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012845552526414394,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.878515625,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.016458464972674846,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0012845552759245039,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012845552759245039,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_bin_occupancy": 0.125390625,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002551899803802371,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7140625,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004267166648060083,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1898749148240314e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1898749148240314e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.14079180657863616,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.18092852234840393,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001759897661395371,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001759897661395371,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14079180657863616,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.865234375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18092852234840393,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001759897661395371,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001759897661395371,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13867213428020478,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17820720970630646,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017334016738459468,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017334016738459468,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12698494046926498,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.85546875,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1636158138513565,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015873117838054896,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015873117838054896,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07213507741689681,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.887890625,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09348605275154113,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009016884723678231,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009016884723678231,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05218314677476883,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.930078125,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0668656125664711,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006522893439978361,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006522893439978361,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13967403918504714,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17946992814540863,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0017459255410358309,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017459255410358309,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007050628308206797,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.812109375,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011934582144021988,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007050628308206796,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007050628308206796,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2894311249256134,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.711328125,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36359102725982667,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.028943114355206488,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.028943114355206488,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.28309316461584144,
|
|
"calibration/batch_distribution_entropy": 0.9562440520005147,
|
|
"calibration/batch_entropy_100bins": 0.9321305829954412,
|
|
"calibration/batch_entropy_10bins": 0.9562440520005147,
|
|
"calibration/batch_entropy_50bins": 0.9521604451830256,
|
|
"calibration/batch_uniqueness": 0.9661712646484375,
|
|
"calibration/buffer_distribution_entropy": 0.9986985889836495,
|
|
"calibration/buffer_entropy_100bins": 0.9827654244956366,
|
|
"calibration/buffer_entropy_10bins": 0.9986985889836495,
|
|
"calibration/buffer_entropy_50bins": 0.994458498244382,
|
|
"calibration/confidence_entropy": 0.4959383976784825,
|
|
"calibration/coverage@0%": 0.013671875,
|
|
"calibration/coverage@1%": 0.013671875,
|
|
"calibration/coverage@10%": 0.07421875,
|
|
"calibration/coverage@15%": 0.1591796875,
|
|
"calibration/coverage@20%": 0.3056640625,
|
|
"calibration/coverage@25%": 0.3720703125,
|
|
"calibration/coverage@30%": 0.59765625,
|
|
"calibration/coverage@5%": 0.013671875,
|
|
"calibration/ece": 0.14605446661523436,
|
|
"calibration/mean_confidence": 0.6033933338027344,
|
|
"calibration/prompt_uniqueness": 0.8629150390625,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 391.0,
|
|
"completions/max_terminated_length": 391.0,
|
|
"completions/mean_length": 162.71500396728516,
|
|
"completions/mean_terminated_length": 162.71500396728516,
|
|
"completions/min_length": 73.5,
|
|
"completions/min_terminated_length": 73.5,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1048384942.0,
|
|
"reward": 0.9344164729118347,
|
|
"reward_std": 0.08154623582959175,
|
|
"rewards/accuracy_reward": 0.558837890625,
|
|
"rewards/brier_reward": 0.7741440236568451,
|
|
"rewards/confidence_uniqueness_reward": 0.9663105010986328,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0030248835682868958,
|
|
"rewards/frontier_coverage_0": 0.06300802156329155,
|
|
"rewards/frontier_coverage_1": 0.06300802156329155,
|
|
"rewards/frontier_coverage_10": 0.062442582100629807,
|
|
"rewards/frontier_coverage_15": 0.055601296946406364,
|
|
"rewards/frontier_coverage_20": 0.03809378854930401,
|
|
"rewards/frontier_coverage_25": 0.03736502677202225,
|
|
"rewards/frontier_coverage_5": 0.06258464232087135,
|
|
"rewards/frontier_ece_reward": 0.0019081256468780339,
|
|
"rewards/frontier_entropy_batch_reward": -0.23977214097976685,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0806732177734375,
|
|
"signal/accuracy_reward/group_bin_occupancy": 0.1630859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1065446101129055,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6953125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04033660888671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04033660888671875,
|
|
"signal/advantage_abs_mean": 0.06426878273487091,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06426878273487091,
|
|
"signal/advantage_pre_scale_std": 0.10264430195093155,
|
|
"signal/advantage_std": 0.10264430195093155,
|
|
"signal/brier_reward/centered_abs_mean": 0.11038177087903023,
|
|
"signal/brier_reward/group_bin_occupancy": 0.8701171875,
|
|
"signal/brier_reward/group_std_mean": 0.14014140516519547,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011038177646696568,
|
|
"signal/brier_reward/weight": 0.10000000149011612,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011038177646696568,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011780858039855957,
|
|
"signal/confidence_uniqueness_reward/group_bin_occupancy": 0.8720703125,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.015235808677971363,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0011780858621932566,
|
|
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011780858621932566,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_bin_occupancy": 0.125,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002542344154790044,
|
|
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7236328125,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0040895091369748116,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1779301025380846e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.012500000186264515,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1779301025380846e-05,
|
|
"signal/frontier_coverage_0/centered_abs_mean": 0.13523942232131958,
|
|
"signal/frontier_coverage_0/group_bin_occupancy": 0.8798828125,
|
|
"signal/frontier_coverage_0/group_std_mean": 0.17214351892471313,
|
|
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0016904928488656878,
|
|
"signal/frontier_coverage_0/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0016904928488656878,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13523942232131958,
|
|
"signal/frontier_coverage_1/group_bin_occupancy": 0.8798828125,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17214351892471313,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0016904928488656878,
|
|
"signal/frontier_coverage_1/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0016904928488656878,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13328810781240463,
|
|
"signal/frontier_coverage_10/group_bin_occupancy": 0.8740234375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16966666281223297,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016661013942211866,
|
|
"signal/frontier_coverage_10/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016661013942211866,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1213008388876915,
|
|
"signal/frontier_coverage_15/group_bin_occupancy": 0.8662109375,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15479815006256104,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001516260497737676,
|
|
"signal/frontier_coverage_15/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001516260497737676,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.065843116492033,
|
|
"signal/frontier_coverage_20/group_bin_occupancy": 0.8955078125,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.08446861431002617,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0008230389503296465,
|
|
"signal/frontier_coverage_20/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008230389503296465,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.04858388379216194,
|
|
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06284799799323082,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0006072985415812582,
|
|
"signal/frontier_coverage_25/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006072985415812582,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1348385065793991,
|
|
"signal/frontier_coverage_5/group_bin_occupancy": 0.876953125,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17164986580610275,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001685481343884021,
|
|
"signal/frontier_coverage_5/weight": 0.012500000186264515,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001685481343884021,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006206750171259046,
|
|
"signal/frontier_ece_reward/group_bin_occupancy": 0.8408203125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009092409629374743,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006206750113051385,
|
|
"signal/frontier_ece_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006206750113051385,
|
|
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30052025616168976,
|
|
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7265625,
|
|
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37405627965927124,
|
|
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03005202580243349,
|
|
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
|
|
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03005202580243349,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.004423426932846315,
|
|
"train_runtime": 60056.3002,
|
|
"train_samples_per_second": 0.333,
|
|
"train_steps_per_second": 0.005
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1048384942,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|