Model: hector-gr/RLCR-v4-ks-uniqueness-hotpot-aliases-qwen35-balanced-fullnode-ga32 Source: Original Platform
9284 lines
572 KiB
JSON
9284 lines
572 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984,
|
|
"eval_steps": 50,
|
|
"global_step": 312,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"calibration/aurc": 0.574161369417126,
|
|
"calibration/batch_distribution_entropy": 0.6217632380850391,
|
|
"calibration/confidence_entropy": 0.3449140549111297,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.07282051282051281,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.47335296160366747,
|
|
"calibration/mean_confidence": 0.8051669529651726,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0361328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1501.2,
|
|
"completions/mean_length": 268.3080078125,
|
|
"completions/mean_terminated_length": 220.78201293945312,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.016,
|
|
"grad_norm": 0.11463230848312378,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"loss": 0.0912,
|
|
"num_tokens": 17591506.0,
|
|
"reward": 0.6744200468063355,
|
|
"reward_std": 0.49649240970611574,
|
|
"rewards/accuracy_reward": 0.26630859375,
|
|
"rewards/brier_reward": 0.4115479052066803,
|
|
"rewards/confidence_uniqueness_reward": 0.4812490105628967,
|
|
"rewards/format_reward": 0.68798828125,
|
|
"rewards/frontier_aurc_reward": 0.3422773241996765,
|
|
"rewards/frontier_coverage_1": 0.3422773241996765,
|
|
"rewards/frontier_coverage_10": 0.3422773241996765,
|
|
"rewards/frontier_coverage_15": 0.3422773241996765,
|
|
"rewards/frontier_coverage_20": 0.3422773241996765,
|
|
"rewards/frontier_coverage_25": 0.3422773241996765,
|
|
"rewards/frontier_coverage_5": 0.3422773241996765,
|
|
"rewards/frontier_ece_reward": 0.3422773241996765,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.274066162109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.31360672116279603,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.26875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1370330810546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1370330810546875,
|
|
"signal/advantage_abs_mean": 0.42605471014976504,
|
|
"signal/advantage_pre_scale_abs_mean": 0.42605471014976504,
|
|
"signal/advantage_pre_scale_std": 0.5046224594116211,
|
|
"signal/advantage_std": 0.5046224594116211,
|
|
"signal/brier_reward/centered_abs_mean": 0.33465067148208616,
|
|
"signal/brier_reward/group_std_mean": 0.3789239704608917,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04183133393526077,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.04183133393526077,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2928457796573639,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.34470821022987364,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03660572245717049,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03660572245717049,
|
|
"signal/format_reward/centered_abs_mean": 0.394317626953125,
|
|
"signal/format_reward/group_std_mean": 0.4479940414428711,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1971588134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1971588134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005628210585564375,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.31442518830299376,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3627101004123688,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03930314853787422,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03930314853787422,
|
|
"step": 5
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6252603742483307,
|
|
"calibration/batch_distribution_entropy": 0.6553918903963509,
|
|
"calibration/confidence_entropy": 0.3561837908236652,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.4639029824341743,
|
|
"calibration/mean_confidence": 0.7835663529333339,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.03310546875,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1525.8,
|
|
"completions/mean_length": 253.1953125,
|
|
"completions/mean_terminated_length": 209.34018249511718,
|
|
"completions/min_length": 2.0,
|
|
"completions/min_terminated_length": 2.0,
|
|
"epoch": 0.032,
|
|
"grad_norm": 0.038865186274051666,
|
|
"learning_rate": 6.249999999999999e-07,
|
|
"loss": 0.0913,
|
|
"num_tokens": 35284578.0,
|
|
"reward": 0.7065129756927491,
|
|
"reward_std": 0.4615809082984924,
|
|
"rewards/accuracy_reward": 0.25361328125,
|
|
"rewards/brier_reward": 0.4207825243473053,
|
|
"rewards/confidence_uniqueness_reward": 0.5312160611152649,
|
|
"rewards/format_reward": 0.75029296875,
|
|
"rewards/frontier_aurc_reward": 0.3418300449848175,
|
|
"rewards/frontier_coverage_1": 0.3418300449848175,
|
|
"rewards/frontier_coverage_10": 0.3418300449848175,
|
|
"rewards/frontier_coverage_15": 0.3418300449848175,
|
|
"rewards/frontier_coverage_20": 0.3418300449848175,
|
|
"rewards/frontier_coverage_25": 0.3418300449848175,
|
|
"rewards/frontier_coverage_5": 0.3418300449848175,
|
|
"rewards/frontier_ece_reward": 0.3418300449848175,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.252484130859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.3011133372783661,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.259375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1262420654296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1262420654296875,
|
|
"signal/advantage_abs_mean": 0.37834325432777405,
|
|
"signal/advantage_pre_scale_abs_mean": 0.37834325432777405,
|
|
"signal/advantage_pre_scale_std": 0.470405113697052,
|
|
"signal/advantage_std": 0.470405113697052,
|
|
"signal/brier_reward/centered_abs_mean": 0.3180624425411224,
|
|
"signal/brier_reward/group_std_mean": 0.36653814315795896,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0397578053176403,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0397578053176403,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.26791125535964966,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.3284755825996399,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03348890691995621,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03348890691995621,
|
|
"signal/format_reward/centered_abs_mean": 0.348736572265625,
|
|
"signal/format_reward/group_std_mean": 0.4197552680969238,
|
|
"signal/format_reward/group_zero_std_frac": 0.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1743682861328125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.1743682861328125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00535974558442831,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2994271457195282,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.35230074524879457,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03742839321494103,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03742839321494103,
|
|
"step": 10
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5063660563094874,
|
|
"calibration/batch_distribution_entropy": 0.6456137890333957,
|
|
"calibration/confidence_entropy": 0.3523480270306522,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.0,
|
|
"calibration/coverage@30%": 0.0,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.40836266399469745,
|
|
"calibration/mean_confidence": 0.803510317425796,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0111328125,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 1402.4,
|
|
"completions/mean_length": 187.18408203125,
|
|
"completions/mean_terminated_length": 172.0759704589844,
|
|
"completions/min_length": 8.8,
|
|
"completions/min_terminated_length": 8.8,
|
|
"epoch": 0.048,
|
|
"grad_norm": 0.3801707625389099,
|
|
"learning_rate": 9.374999999999999e-07,
|
|
"loss": 0.0454,
|
|
"num_tokens": 52250079.0,
|
|
"reward": 0.874800705909729,
|
|
"reward_std": 0.349214905500412,
|
|
"rewards/accuracy_reward": 0.3330078125,
|
|
"rewards/brier_reward": 0.5289363861083984,
|
|
"rewards/confidence_uniqueness_reward": 0.6511791348457336,
|
|
"rewards/format_reward": 0.90400390625,
|
|
"rewards/frontier_aurc_reward": 0.434600293636322,
|
|
"rewards/frontier_coverage_1": 0.434600293636322,
|
|
"rewards/frontier_coverage_10": 0.434600293636322,
|
|
"rewards/frontier_coverage_15": 0.434600293636322,
|
|
"rewards/frontier_coverage_20": 0.434600293636322,
|
|
"rewards/frontier_coverage_25": 0.434600293636322,
|
|
"rewards/frontier_coverage_5": 0.434600293636322,
|
|
"rewards/frontier_ece_reward": 0.434600293636322,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.21435546875,
|
|
"signal/accuracy_reward/group_std_mean": 0.2680795192718506,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.3,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.107177734375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.107177734375,
|
|
"signal/advantage_abs_mean": 0.2617394238710403,
|
|
"signal/advantage_pre_scale_abs_mean": 0.2617394238710403,
|
|
"signal/advantage_pre_scale_std": 0.3631041467189789,
|
|
"signal/advantage_std": 0.3631041467189789,
|
|
"signal/brier_reward/centered_abs_mean": 0.26632643938064576,
|
|
"signal/brier_reward/group_std_mean": 0.32310367822647096,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03329080492258072,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.03329080492258072,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18657545149326324,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.24681947529315948,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023321931436657905,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023321931436657905,
|
|
"signal/format_reward/centered_abs_mean": 0.161785888671875,
|
|
"signal/format_reward/group_std_mean": 0.25919924676418304,
|
|
"signal/format_reward/group_zero_std_frac": 0.125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0808929443359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0808929443359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0047444062307477,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.26505063772201537,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3231291711330414,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03313132971525192,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03313132971525192,
|
|
"step": 15
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4569322433246872,
|
|
"calibration/batch_distribution_entropy": 0.701901124719379,
|
|
"calibration/confidence_entropy": 0.368413807477986,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.14661354581673308,
|
|
"calibration/coverage@30%": 0.26693227091633465,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.31210808530583545,
|
|
"calibration/mean_confidence": 0.7689342707600968,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00302734375,
|
|
"completions/max_length": 1536.0,
|
|
"completions/max_terminated_length": 980.0,
|
|
"completions/mean_length": 133.61025390625,
|
|
"completions/mean_terminated_length": 129.3588897705078,
|
|
"completions/min_length": 26.4,
|
|
"completions/min_terminated_length": 26.4,
|
|
"epoch": 0.064,
|
|
"grad_norm": 0.03475378826260567,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0134,
|
|
"num_tokens": 68536648.0,
|
|
"reward": 0.9799639225006104,
|
|
"reward_std": 0.26246568858623504,
|
|
"rewards/accuracy_reward": 0.39072265625,
|
|
"rewards/brier_reward": 0.6048341035842896,
|
|
"rewards/confidence_uniqueness_reward": 0.738007652759552,
|
|
"rewards/format_reward": 0.98427734375,
|
|
"rewards/frontier_aurc_reward": 0.4978376030921936,
|
|
"rewards/frontier_coverage_1": 0.4978376030921936,
|
|
"rewards/frontier_coverage_10": 0.4978376030921936,
|
|
"rewards/frontier_coverage_15": 0.4978376030921936,
|
|
"rewards/frontier_coverage_20": 0.4978376030921936,
|
|
"rewards/frontier_coverage_25": 0.4978376030921936,
|
|
"rewards/frontier_coverage_5": 0.4978376030921936,
|
|
"rewards/frontier_ece_reward": 0.4978376030921936,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.214776611328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.2681283473968506,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.296875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1073883056640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1073883056640625,
|
|
"signal/advantage_abs_mean": 0.20265749394893645,
|
|
"signal/advantage_pre_scale_abs_mean": 0.20265749394893645,
|
|
"signal/advantage_pre_scale_std": 0.2796101540327072,
|
|
"signal/advantage_std": 0.2796101540327072,
|
|
"signal/brier_reward/centered_abs_mean": 0.23721030354499817,
|
|
"signal/brier_reward/group_std_mean": 0.2935959815979004,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02965128794312477,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02965128794312477,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12453396171331406,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.15769868493080139,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015566745214164257,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015566745214164257,
|
|
"signal/format_reward/centered_abs_mean": 0.029925537109375,
|
|
"signal/format_reward/group_std_mean": 0.07638685405254364,
|
|
"signal/format_reward/group_zero_std_frac": 0.609375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0149627685546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0149627685546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045852516777813435,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.2561593323945999,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.3122838795185089,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03201991654932499,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03201991654932499,
|
|
"step": 20
|
|
},
|
|
{
|
|
"calibration/aurc": 0.5084255341898481,
|
|
"calibration/batch_distribution_entropy": 0.8164247601318154,
|
|
"calibration/buffer_distribution_entropy": 0.7090379807345808,
|
|
"calibration/confidence_entropy": 0.45600894852795226,
|
|
"calibration/coverage@0%": 0.0,
|
|
"calibration/coverage@1%": 0.0,
|
|
"calibration/coverage@10%": 0.0,
|
|
"calibration/coverage@15%": 0.0,
|
|
"calibration/coverage@20%": 0.0,
|
|
"calibration/coverage@25%": 0.029133858267716535,
|
|
"calibration/coverage@30%": 0.07401574803149606,
|
|
"calibration/coverage@5%": 0.0,
|
|
"calibration/ece": 0.3110133148570779,
|
|
"calibration/mean_confidence": 0.6897827998864032,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00078125,
|
|
"completions/max_length": 1409.6,
|
|
"completions/max_terminated_length": 799.0,
|
|
"completions/mean_length": 118.04287109375,
|
|
"completions/mean_terminated_length": 116.93400421142579,
|
|
"completions/min_length": 38.8,
|
|
"completions/min_terminated_length": 38.8,
|
|
"epoch": 0.08,
|
|
"grad_norm": 0.00825721025466919,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0023,
|
|
"num_tokens": 84678559.0,
|
|
"reward": 0.9916581392288208,
|
|
"reward_std": 0.19244107306003572,
|
|
"rewards/accuracy_reward": 0.4255859375,
|
|
"rewards/brier_reward": 0.6843536019325256,
|
|
"rewards/confidence_uniqueness_reward": 0.817476212978363,
|
|
"rewards/format_reward": 0.996484375,
|
|
"rewards/frontier_aurc_reward": 0.3578193149529397,
|
|
"rewards/frontier_coverage_1": 0.3889893189072609,
|
|
"rewards/frontier_coverage_10": 0.3889893189072609,
|
|
"rewards/frontier_coverage_15": 0.3889893189072609,
|
|
"rewards/frontier_coverage_20": 0.3889893189072609,
|
|
"rewards/frontier_coverage_25": 0.3889893189072609,
|
|
"rewards/frontier_coverage_5": 0.3889893189072609,
|
|
"rewards/frontier_ece_reward": 0.3576948957517743,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.18526611328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.23501957356929778,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092633056640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.092633056640625,
|
|
"signal/advantage_abs_mean": 0.1510450452566147,
|
|
"signal/advantage_pre_scale_abs_mean": 0.1510450452566147,
|
|
"signal/advantage_pre_scale_std": 0.21218505203723909,
|
|
"signal/advantage_std": 0.21218505203723909,
|
|
"signal/brier_reward/centered_abs_mean": 0.2015215128660202,
|
|
"signal/brier_reward/group_std_mean": 0.25240307450294497,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025190189108252527,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.025190189108252527,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08064173310995101,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10429088771343231,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010080216638743877,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010080216638743877,
|
|
"signal/format_reward/centered_abs_mean": 0.00677490234375,
|
|
"signal/format_reward/group_std_mean": 0.01887845266610384,
|
|
"signal/format_reward/group_zero_std_frac": 0.896875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003387451171875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.003387451171875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.1544154985807836,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.18873186707496642,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.002764037343149539,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.002764037343149539,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.21573287844657899,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.27374354004859924,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038616183679550885,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.1824594885110855,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.22269095629453659,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02280743606388569,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02280743606388569,
|
|
"step": 25
|
|
},
|
|
{
|
|
"calibration/aurc": 0.6451447340341319,
|
|
"calibration/batch_distribution_entropy": 0.8836254438118136,
|
|
"calibration/buffer_distribution_entropy": 0.7536336258740739,
|
|
"calibration/confidence_entropy": 0.5477458249391652,
|
|
"calibration/coverage@0%": 0.00234375,
|
|
"calibration/coverage@1%": 0.00234375,
|
|
"calibration/coverage@10%": 0.00234375,
|
|
"calibration/coverage@15%": 0.00234375,
|
|
"calibration/coverage@20%": 0.00234375,
|
|
"calibration/coverage@25%": 0.003125,
|
|
"calibration/coverage@30%": 0.003125,
|
|
"calibration/coverage@5%": 0.00234375,
|
|
"calibration/ece": 0.2766606577437428,
|
|
"calibration/mean_confidence": 0.5350011617572232,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 864.6,
|
|
"completions/max_terminated_length": 464.4,
|
|
"completions/mean_length": 126.6078125,
|
|
"completions/mean_terminated_length": 126.33264770507813,
|
|
"completions/min_length": 41.4,
|
|
"completions/min_terminated_length": 41.4,
|
|
"epoch": 0.096,
|
|
"grad_norm": 0.007818322628736496,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0003,
|
|
"num_tokens": 101019631.0,
|
|
"reward": 0.9390641927719117,
|
|
"reward_std": 0.1480298787355423,
|
|
"rewards/accuracy_reward": 0.4728515625,
|
|
"rewards/brier_reward": 0.7311343312263489,
|
|
"rewards/confidence_uniqueness_reward": 0.8339264154434204,
|
|
"rewards/format_reward": 0.99755859375,
|
|
"rewards/frontier_aurc_reward": -0.004495029617100954,
|
|
"rewards/frontier_coverage_1": 0.06669748276472091,
|
|
"rewards/frontier_coverage_10": 0.06669748276472091,
|
|
"rewards/frontier_coverage_15": 0.06669748276472091,
|
|
"rewards/frontier_coverage_20": 0.06669748276472091,
|
|
"rewards/frontier_coverage_25": 0.06669748276472091,
|
|
"rewards/frontier_coverage_5": 0.06669748276472091,
|
|
"rewards/frontier_ece_reward": 0.009149301517754792,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.17642822265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.22929745614528657,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088214111328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.088214111328125,
|
|
"signal/advantage_abs_mean": 0.11449322551488876,
|
|
"signal/advantage_pre_scale_abs_mean": 0.11449322551488876,
|
|
"signal/advantage_pre_scale_std": 0.16401045322418212,
|
|
"signal/advantage_std": 0.16401045322418212,
|
|
"signal/brier_reward/centered_abs_mean": 0.18242722153663635,
|
|
"signal/brier_reward/group_std_mean": 0.23005988895893098,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022803402692079543,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.022803402692079543,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08953404575586318,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.1142925649881363,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011191755719482898,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011191755719482898,
|
|
"signal/format_reward/centered_abs_mean": 0.004730224609375,
|
|
"signal/format_reward/group_std_mean": 0.013810678757727146,
|
|
"signal/format_reward/group_zero_std_frac": 0.921875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023651123046875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0023651123046875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024468526942655446,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003993393434211612,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.379866222734563e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.379866222734563e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1959122210741043,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26175145506858827,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003506828611716628,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.06938310116529464,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.08558708280324936,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00867288764566183,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00867288764566183,
|
|
"step": 30
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3086294818860637,
|
|
"calibration/batch_distribution_entropy": 0.8961884358507668,
|
|
"calibration/buffer_distribution_entropy": 0.8133524081442107,
|
|
"calibration/confidence_entropy": 0.5430245461216926,
|
|
"calibration/coverage@0%": 0.009375,
|
|
"calibration/coverage@1%": 0.009375,
|
|
"calibration/coverage@10%": 0.06875,
|
|
"calibration/coverage@15%": 0.10546875,
|
|
"calibration/coverage@20%": 0.19140625,
|
|
"calibration/coverage@25%": 0.37265625,
|
|
"calibration/coverage@30%": 0.53671875,
|
|
"calibration/coverage@5%": 0.025,
|
|
"calibration/ece": 0.15086028370235913,
|
|
"calibration/mean_confidence": 0.46044110662133236,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1082.4,
|
|
"completions/max_terminated_length": 397.4,
|
|
"completions/mean_length": 135.834375,
|
|
"completions/mean_terminated_length": 135.4240753173828,
|
|
"completions/min_length": 49.2,
|
|
"completions/min_terminated_length": 49.2,
|
|
"epoch": 0.112,
|
|
"grad_norm": 0.004440602846443653,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 117520047.0,
|
|
"reward": 0.9525650858879089,
|
|
"reward_std": 0.12223374545574188,
|
|
"rewards/accuracy_reward": 0.48525390625,
|
|
"rewards/brier_reward": 0.7479133129119873,
|
|
"rewards/confidence_uniqueness_reward": 0.8512405276298523,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.003925839858129621,
|
|
"rewards/frontier_coverage_1": 0.08381552398204803,
|
|
"rewards/frontier_coverage_10": 0.08381552398204803,
|
|
"rewards/frontier_coverage_15": 0.08381552398204803,
|
|
"rewards/frontier_coverage_20": 0.08381552398204803,
|
|
"rewards/frontier_coverage_25": 0.08381552398204803,
|
|
"rewards/frontier_coverage_5": 0.08381552398204803,
|
|
"rewards/frontier_ece_reward": 0.012414590083062648,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.167669677734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.21841561794281006,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0838348388671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0838348388671875,
|
|
"signal/advantage_abs_mean": 0.09548963457345963,
|
|
"signal/advantage_pre_scale_abs_mean": 0.09548963457345963,
|
|
"signal/advantage_pre_scale_std": 0.13613282144069672,
|
|
"signal/advantage_std": 0.13613282144069672,
|
|
"signal/brier_reward/centered_abs_mean": 0.1708603948354721,
|
|
"signal/brier_reward/group_std_mean": 0.21544656455516814,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021357549354434013,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021357549354434013,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08106829673051834,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.09855363517999649,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010133537091314792,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010133537091314792,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844423562288,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016749128932133316,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002730554435402155,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.998093877977226e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.998093877977226e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.24808040857315064,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3155758440494537,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044406389351934195,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04592524915933609,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.06059465631842613,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005740656144917011,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005740656144917011,
|
|
"step": 35
|
|
},
|
|
{
|
|
"calibration/aurc": 0.4580176944367551,
|
|
"calibration/batch_distribution_entropy": 0.8535035360352362,
|
|
"calibration/buffer_distribution_entropy": 0.8659183215624904,
|
|
"calibration/confidence_entropy": 0.519083178935934,
|
|
"calibration/coverage@0%": 0.007815563725490197,
|
|
"calibration/coverage@1%": 0.007815563725490197,
|
|
"calibration/coverage@10%": 0.03203431372549019,
|
|
"calibration/coverage@15%": 0.06328431372549019,
|
|
"calibration/coverage@20%": 0.11484681372549019,
|
|
"calibration/coverage@25%": 0.1296905637254902,
|
|
"calibration/coverage@30%": 0.2289093137254902,
|
|
"calibration/coverage@5%": 0.007815563725490197,
|
|
"calibration/ece": 0.1627333490117647,
|
|
"calibration/mean_confidence": 0.3436108605470588,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 912.4,
|
|
"completions/max_terminated_length": 448.8,
|
|
"completions/mean_length": 149.380859375,
|
|
"completions/mean_terminated_length": 149.11015625,
|
|
"completions/min_length": 65.4,
|
|
"completions/min_terminated_length": 65.4,
|
|
"epoch": 0.128,
|
|
"grad_norm": 0.003327795770019293,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 133966379.0,
|
|
"reward": 0.9490593194961547,
|
|
"reward_std": 0.1036192610859871,
|
|
"rewards/accuracy_reward": 0.47001953125,
|
|
"rewards/brier_reward": 0.7549473881721497,
|
|
"rewards/confidence_uniqueness_reward": 0.8515770912170411,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0035853940062224865,
|
|
"rewards/frontier_coverage_1": 0.11151133924722671,
|
|
"rewards/frontier_coverage_10": 0.11151133924722671,
|
|
"rewards/frontier_coverage_15": 0.11151133924722671,
|
|
"rewards/frontier_coverage_20": 0.11151133924722671,
|
|
"rewards/frontier_coverage_25": 0.11151133924722671,
|
|
"rewards/frontier_coverage_5": 0.11151133924722671,
|
|
"rewards/frontier_ece_reward": 0.013309185951948166,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.142132568359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.19049813449382783,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0710662841796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0710662841796875,
|
|
"signal/advantage_abs_mean": 0.08017075657844544,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08017075657844544,
|
|
"signal/advantage_pre_scale_std": 0.11746061593294144,
|
|
"signal/advantage_std": 0.11746061593294144,
|
|
"signal/brier_reward/centered_abs_mean": 0.15917613804340364,
|
|
"signal/brier_reward/group_std_mean": 0.20062560141086577,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019897017255425455,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019897017255425455,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08653065264225006,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.11002808213233947,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010816331580281257,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010816331580281257,
|
|
"signal/format_reward/centered_abs_mean": 0.001324462890625,
|
|
"signal/format_reward/group_std_mean": 0.0038669900968670845,
|
|
"signal/format_reward/group_zero_std_frac": 0.978125,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012066281167790293,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019432639004662634,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1598641978926025e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1598641978926025e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2550701230764389,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3213431596755981,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004565754998475313,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03292861394584179,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04621725678443909,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004116076743230224,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004116076743230224,
|
|
"step": 40
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2264348749109058,
|
|
"calibration/batch_distribution_entropy": 0.947362197850653,
|
|
"calibration/buffer_distribution_entropy": 0.9023169704024638,
|
|
"calibration/confidence_entropy": 0.5164703875793273,
|
|
"calibration/coverage@0%": 0.0375,
|
|
"calibration/coverage@1%": 0.0375,
|
|
"calibration/coverage@10%": 0.14453125,
|
|
"calibration/coverage@15%": 0.29140625,
|
|
"calibration/coverage@20%": 0.46796875,
|
|
"calibration/coverage@25%": 0.7109375,
|
|
"calibration/coverage@30%": 0.76171875,
|
|
"calibration/coverage@5%": 0.06953125,
|
|
"calibration/ece": 0.293532578125,
|
|
"calibration/mean_confidence": 0.41213460937499996,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1085.0,
|
|
"completions/max_terminated_length": 383.8,
|
|
"completions/mean_length": 155.62353515625,
|
|
"completions/mean_terminated_length": 155.21868896484375,
|
|
"completions/min_length": 67.2,
|
|
"completions/min_terminated_length": 67.2,
|
|
"epoch": 0.144,
|
|
"grad_norm": 0.0031078618485480547,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 150510396.0,
|
|
"reward": 1.000108528137207,
|
|
"reward_std": 0.10603977590799332,
|
|
"rewards/accuracy_reward": 0.58642578125,
|
|
"rewards/brier_reward": 0.7386995673179626,
|
|
"rewards/confidence_uniqueness_reward": 0.8767276644706726,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0029010240454226733,
|
|
"rewards/frontier_coverage_1": 0.026493354281410576,
|
|
"rewards/frontier_coverage_10": 0.026493354281410576,
|
|
"rewards/frontier_coverage_15": 0.026493354281410576,
|
|
"rewards/frontier_coverage_20": 0.026493354281410576,
|
|
"rewards/frontier_coverage_25": 0.026493354281410576,
|
|
"rewards/frontier_coverage_5": 0.026493354281410576,
|
|
"rewards/frontier_ece_reward": 0.018952517956495284,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.150006103515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.19647997319698335,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750030517578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0750030517578125,
|
|
"signal/advantage_abs_mean": 0.08291901051998138,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08291901051998138,
|
|
"signal/advantage_pre_scale_std": 0.11942969560623169,
|
|
"signal/advantage_std": 0.11942969560623169,
|
|
"signal/brier_reward/centered_abs_mean": 0.17461107671260834,
|
|
"signal/brier_reward/group_std_mean": 0.21828512847423553,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021826384589076042,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021826384589076042,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06542369574308396,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08328969031572342,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008177961967885495,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008177961967885495,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014980694744735957,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023212187923491003,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6815443561645225e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6815443561645225e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2612448215484619,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.3272443234920502,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004676282219588756,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.039357250183820726,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05260428786277771,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004919656272977591,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004919656272977591,
|
|
"step": 45
|
|
},
|
|
{
|
|
"calibration/aurc": 0.33544708568653764,
|
|
"calibration/batch_distribution_entropy": 0.9641692497219696,
|
|
"calibration/buffer_distribution_entropy": 0.9233838203150402,
|
|
"calibration/confidence_entropy": 0.4748932057775385,
|
|
"calibration/coverage@0%": 0.0046875,
|
|
"calibration/coverage@1%": 0.0046875,
|
|
"calibration/coverage@10%": 0.03671875,
|
|
"calibration/coverage@15%": 0.15174938725490197,
|
|
"calibration/coverage@20%": 0.2479810049019608,
|
|
"calibration/coverage@25%": 0.2894546568627451,
|
|
"calibration/coverage@30%": 0.34893688725490196,
|
|
"calibration/coverage@5%": 0.0046875,
|
|
"calibration/ece": 0.14829035447303923,
|
|
"calibration/mean_confidence": 0.46134567493872547,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 742.4,
|
|
"completions/max_terminated_length": 523.4,
|
|
"completions/mean_length": 163.4974609375,
|
|
"completions/mean_terminated_length": 163.36328125,
|
|
"completions/min_length": 75.8,
|
|
"completions/min_terminated_length": 75.8,
|
|
"epoch": 0.16,
|
|
"grad_norm": 0.0028037051670253277,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 167205538.0,
|
|
"reward": 0.9897387862205506,
|
|
"reward_std": 0.11076341718435287,
|
|
"rewards/accuracy_reward": 0.5359375,
|
|
"rewards/brier_reward": 0.7786368131637573,
|
|
"rewards/confidence_uniqueness_reward": 0.88655526638031,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.003066997462883592,
|
|
"rewards/frontier_coverage_1": 0.09699134379625321,
|
|
"rewards/frontier_coverage_10": 0.09699134379625321,
|
|
"rewards/frontier_coverage_15": 0.09699134379625321,
|
|
"rewards/frontier_coverage_20": 0.09699134379625321,
|
|
"rewards/frontier_coverage_25": 0.09699134379625321,
|
|
"rewards/frontier_coverage_5": 0.09699134379625321,
|
|
"rewards/frontier_ece_reward": 0.026462964341044427,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.14306640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1878939002752304,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071533203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071533203125,
|
|
"signal/advantage_abs_mean": 0.08609444797039031,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08609444797039031,
|
|
"signal/advantage_pre_scale_std": 0.12765211164951323,
|
|
"signal/advantage_std": 0.12765211164951323,
|
|
"signal/brier_reward/centered_abs_mean": 0.16862273216247559,
|
|
"signal/brier_reward/group_std_mean": 0.2126835286617279,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021077841520309448,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021077841520309448,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.056052202731370925,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06837449967861176,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070065253414213656,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070065253414213656,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002428090269677341,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0038595238234847783,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.346281566540711e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.346281566540711e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.22630979716777802,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2893765389919281,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004050945350900293,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04531662836670876,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.058810415863990786,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005664578545838595,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005664578545838595,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"eval_calibration/aurc": 0.4997777285356147,
|
|
"eval_calibration/batch_distribution_entropy": 0.9348213202225528,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9305712700671265,
|
|
"eval_calibration/confidence_entropy": 0.47102110753245985,
|
|
"eval_calibration/coverage@0%": 0.046875,
|
|
"eval_calibration/coverage@1%": 0.046875,
|
|
"eval_calibration/coverage@10%": 0.046875,
|
|
"eval_calibration/coverage@15%": 0.046875,
|
|
"eval_calibration/coverage@20%": 0.046875,
|
|
"eval_calibration/coverage@25%": 0.0625,
|
|
"eval_calibration/coverage@30%": 0.234375,
|
|
"eval_calibration/coverage@5%": 0.046875,
|
|
"eval_calibration/ece": 0.281164314516129,
|
|
"eval_calibration/mean_confidence": 0.5592389112903225,
|
|
"eval_completions/clipped_ratio": 0.002049180327868827,
|
|
"eval_completions/max_length": 911.5,
|
|
"eval_completions/max_terminated_length": 311.5,
|
|
"eval_completions/mean_length": 170.20043182373047,
|
|
"eval_completions/mean_terminated_length": 167.39978790283203,
|
|
"eval_completions/min_length": 84.0,
|
|
"eval_completions/min_terminated_length": 84.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 167205538.0,
|
|
"eval_reward": 0.90069180727005,
|
|
"eval_reward_std": 0.23488686978816986,
|
|
"eval_rewards/accuracy_reward": 0.361328125,
|
|
"eval_rewards/brier_reward": 0.7528277337551117,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8379772901535034,
|
|
"eval_rewards/format_reward": 0.998046875,
|
|
"eval_rewards/frontier_aurc_reward": -0.005042638164013624,
|
|
"eval_rewards/frontier_coverage_1": 0.18974924087524414,
|
|
"eval_rewards/frontier_coverage_10": 0.18974924087524414,
|
|
"eval_rewards/frontier_coverage_15": 0.18974924087524414,
|
|
"eval_rewards/frontier_coverage_20": 0.18974924087524414,
|
|
"eval_rewards/frontier_coverage_25": 0.18974924087524414,
|
|
"eval_rewards/frontier_coverage_5": 0.18974924087524414,
|
|
"eval_rewards/frontier_ece_reward": 0.01491912454366684,
|
|
"eval_runtime": 19.3233,
|
|
"eval_samples_per_second": 25.876,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4456787109375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.47858355939388275,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22283935546875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22283935546875,
|
|
"eval_signal/advantage_abs_mean": 0.20646706968545914,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20646706968545914,
|
|
"eval_signal/advantage_pre_scale_std": 0.23231954872608185,
|
|
"eval_signal/advantage_std": 0.23231954872608185,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22418075799942017,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2734896242618561,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02802259474992752,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02802259474992752,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07463713735342026,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09528587758541107,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009329642169177532,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009329642169177532,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004753857152536511,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007068477105349302,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.50940377858933e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.50940377858933e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.31350038945674896,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.39658913016319275,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005611656466498971,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05817369371652603,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.08309631422162056,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007271711714565754,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007271711714565754,
|
|
"eval_steps_per_second": 0.104,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"step": 50,
|
|
"train_probe_calibration/aurc": 0.2664804729817548,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.9324186885257131,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9309467058219747,
|
|
"train_probe_calibration/confidence_entropy": 0.441220735421333,
|
|
"train_probe_calibration/coverage@0%": 0.03125,
|
|
"train_probe_calibration/coverage@1%": 0.03125,
|
|
"train_probe_calibration/coverage@10%": 0.03125,
|
|
"train_probe_calibration/coverage@15%": 0.46875,
|
|
"train_probe_calibration/coverage@20%": 0.484375,
|
|
"train_probe_calibration/coverage@25%": 0.578125,
|
|
"train_probe_calibration/coverage@30%": 0.640625,
|
|
"train_probe_calibration/coverage@5%": 0.03125,
|
|
"train_probe_calibration/ece": 0.17640625,
|
|
"train_probe_calibration/mean_confidence": 0.57484375,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 366.0,
|
|
"train_probe_completions/max_terminated_length": 366.0,
|
|
"train_probe_completions/mean_length": 165.32955932617188,
|
|
"train_probe_completions/mean_terminated_length": 165.32955932617188,
|
|
"train_probe_completions/min_length": 66.0,
|
|
"train_probe_completions/min_terminated_length": 66.0,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 167205538.0,
|
|
"train_probe_reward": 1.0041134357452393,
|
|
"train_probe_reward_std": 0.2359241172671318,
|
|
"train_probe_rewards/accuracy_reward": 0.57421875,
|
|
"train_probe_rewards/brier_reward": 0.7899730503559113,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.839599609375,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0027748874854296446,
|
|
"train_probe_rewards/frontier_coverage_1": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_coverage_10": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_coverage_15": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_coverage_20": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_coverage_25": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_coverage_5": 0.08121992275118828,
|
|
"train_probe_rewards/frontier_ece_reward": 0.03707304783165455,
|
|
"train_probe_runtime": 9.186,
|
|
"train_probe_samples_per_second": 54.43,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.48193359375,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.49865010380744934,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.240966796875,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.240966796875,
|
|
"train_probe_signal/advantage_abs_mean": 0.2185721918940544,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2185721918940544,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.23293063789606094,
|
|
"train_probe_signal/advantage_std": 0.23293063789606094,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.20220646262168884,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.24830978363752365,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025275807827711105,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.025275807827711105,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.076904296875,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09100573509931564,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009613037109375,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009613037109375,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.004000097163952887,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.00605845358222723,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.160173117881641e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.160173117881641e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30071887373924255,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4058589041233063,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0053828677628189325,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.06303473375737667,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.08334130793809891,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007879341719672084,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007879341719672084,
|
|
"train_probe_steps_per_second": 0.218
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2950848289576411,
|
|
"calibration/batch_distribution_entropy": 0.930281332331097,
|
|
"calibration/buffer_distribution_entropy": 0.9341737996692416,
|
|
"calibration/confidence_entropy": 0.4755722991194964,
|
|
"calibration/coverage@0%": 0.01484375,
|
|
"calibration/coverage@1%": 0.01484375,
|
|
"calibration/coverage@10%": 0.16171875,
|
|
"calibration/coverage@15%": 0.2046875,
|
|
"calibration/coverage@20%": 0.2859375,
|
|
"calibration/coverage@25%": 0.3765625,
|
|
"calibration/coverage@30%": 0.5828125,
|
|
"calibration/coverage@5%": 0.10234375,
|
|
"calibration/ece": 0.1771126289368873,
|
|
"calibration/mean_confidence": 0.5750459494944853,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1084.8,
|
|
"completions/max_terminated_length": 497.4,
|
|
"completions/mean_length": 168.68369140625,
|
|
"completions/mean_terminated_length": 168.2832794189453,
|
|
"completions/min_length": 78.0,
|
|
"completions/min_terminated_length": 78.0,
|
|
"epoch": 0.176,
|
|
"grad_norm": 0.003821933874860406,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 184169979.0,
|
|
"reward": 0.9811482787132263,
|
|
"reward_std": 0.1093181312084198,
|
|
"rewards/accuracy_reward": 0.52294921875,
|
|
"rewards/brier_reward": 0.7728787422180176,
|
|
"rewards/confidence_uniqueness_reward": 0.8689801812171936,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.003453007619827986,
|
|
"rewards/frontier_coverage_1": 0.10514852032065392,
|
|
"rewards/frontier_coverage_10": 0.10514852032065392,
|
|
"rewards/frontier_coverage_15": 0.10514852032065392,
|
|
"rewards/frontier_coverage_20": 0.10514852032065392,
|
|
"rewards/frontier_coverage_25": 0.10514852032065392,
|
|
"rewards/frontier_coverage_5": 0.10514852032065392,
|
|
"rewards/frontier_ece_reward": 0.02763434946537018,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.128729248046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1696704939007759,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643646240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0643646240234375,
|
|
"signal/advantage_abs_mean": 0.0838969498872757,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0838969498872757,
|
|
"signal/advantage_pre_scale_std": 0.12963834255933762,
|
|
"signal/advantage_std": 0.12963834255933762,
|
|
"signal/brier_reward/centered_abs_mean": 0.16208215355873107,
|
|
"signal/brier_reward/group_std_mean": 0.20663413107395173,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020260269194841383,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020260269194841383,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07365219593048096,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08795170336961747,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00920652449131012,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00920652449131012,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002859164075925946,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004245653934776783,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.117903638165444e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.117903638165444e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1915457785129547,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.24914441704750062,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003428669273853302,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04376091659069061,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05602394491434097,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005470114573836326,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005470114573836326,
|
|
"step": 55
|
|
},
|
|
{
|
|
"calibration/aurc": 0.331854588779599,
|
|
"calibration/batch_distribution_entropy": 0.8745741485620421,
|
|
"calibration/buffer_distribution_entropy": 0.9365178507635724,
|
|
"calibration/confidence_entropy": 0.4012815190096622,
|
|
"calibration/coverage@0%": 0.003125,
|
|
"calibration/coverage@1%": 0.003125,
|
|
"calibration/coverage@10%": 0.05234375,
|
|
"calibration/coverage@15%": 0.140625,
|
|
"calibration/coverage@20%": 0.32421875,
|
|
"calibration/coverage@25%": 0.3828125,
|
|
"calibration/coverage@30%": 0.5078125,
|
|
"calibration/coverage@5%": 0.003125,
|
|
"calibration/ece": 0.173107109375,
|
|
"calibration/mean_confidence": 0.6393742968749999,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 829.0,
|
|
"completions/max_terminated_length": 608.8,
|
|
"completions/mean_length": 167.47001953125,
|
|
"completions/mean_terminated_length": 167.33668518066406,
|
|
"completions/min_length": 78.8,
|
|
"completions/min_terminated_length": 78.8,
|
|
"epoch": 0.192,
|
|
"grad_norm": 0.004035938531160355,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 200699688.0,
|
|
"reward": 0.9879758596420288,
|
|
"reward_std": 0.11625557094812393,
|
|
"rewards/accuracy_reward": 0.53916015625,
|
|
"rewards/brier_reward": 0.7739283800125122,
|
|
"rewards/confidence_uniqueness_reward": 0.8580935597419739,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0036836853716522453,
|
|
"rewards/frontier_coverage_1": 0.10152835100889206,
|
|
"rewards/frontier_coverage_10": 0.10152835100889206,
|
|
"rewards/frontier_coverage_15": 0.10152835100889206,
|
|
"rewards/frontier_coverage_20": 0.10152835100889206,
|
|
"rewards/frontier_coverage_25": 0.10152835100889206,
|
|
"rewards/frontier_coverage_5": 0.10152835100889206,
|
|
"rewards/frontier_ece_reward": 0.030000920966267587,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.136065673828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.179530268907547,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.484375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0680328369140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0680328369140625,
|
|
"signal/advantage_abs_mean": 0.08931153416633605,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08931153416633605,
|
|
"signal/advantage_pre_scale_std": 0.13905880898237227,
|
|
"signal/advantage_std": 0.13905880898237227,
|
|
"signal/brier_reward/centered_abs_mean": 0.17017331421375276,
|
|
"signal/brier_reward/group_std_mean": 0.21759623885154725,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021271664276719095,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021271664276719095,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09090490639209747,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.10824680477380752,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011363113299012184,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011363113299012184,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036389449145644904,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0054845036007463936,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.513711123261601e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.513711123261601e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17842654287815093,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23917962312698365,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003193834982812405,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04488262310624123,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.056452129036188126,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0056103278882801534,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0056103278882801534,
|
|
"step": 60
|
|
},
|
|
{
|
|
"calibration/aurc": 0.32373530576954757,
|
|
"calibration/batch_distribution_entropy": 0.8591898290330275,
|
|
"calibration/buffer_distribution_entropy": 0.9347868571410448,
|
|
"calibration/confidence_entropy": 0.3849134860388972,
|
|
"calibration/coverage@0%": 0.00859375,
|
|
"calibration/coverage@1%": 0.00859375,
|
|
"calibration/coverage@10%": 0.09375,
|
|
"calibration/coverage@15%": 0.24765625,
|
|
"calibration/coverage@20%": 0.36015625,
|
|
"calibration/coverage@25%": 0.546875,
|
|
"calibration/coverage@30%": 0.65546875,
|
|
"calibration/coverage@5%": 0.00859375,
|
|
"calibration/ece": 0.18438471015114383,
|
|
"calibration/mean_confidence": 0.6311039438521242,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 873.6,
|
|
"completions/max_terminated_length": 432.2,
|
|
"completions/mean_length": 166.7802734375,
|
|
"completions/mean_terminated_length": 166.3782531738281,
|
|
"completions/min_length": 72.8,
|
|
"completions/min_terminated_length": 72.8,
|
|
"epoch": 0.208,
|
|
"grad_norm": 0.0025081464555114508,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 217439742.0,
|
|
"reward": 1.0152548551559448,
|
|
"reward_std": 0.11149686425924302,
|
|
"rewards/accuracy_reward": 0.58408203125,
|
|
"rewards/brier_reward": 0.7946485996246337,
|
|
"rewards/confidence_uniqueness_reward": 0.8775890946388245,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0030152024235576393,
|
|
"rewards/frontier_coverage_1": 0.09387013614177704,
|
|
"rewards/frontier_coverage_10": 0.09387013614177704,
|
|
"rewards/frontier_coverage_15": 0.09387013614177704,
|
|
"rewards/frontier_coverage_20": 0.09387013614177704,
|
|
"rewards/frontier_coverage_25": 0.09387013614177704,
|
|
"rewards/frontier_coverage_5": 0.09387013614177704,
|
|
"rewards/frontier_ece_reward": 0.03520463481545448,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.121368408203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1650165855884552,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0606842041015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0606842041015625,
|
|
"signal/advantage_abs_mean": 0.08337271958589554,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08337271958589554,
|
|
"signal/advantage_pre_scale_std": 0.13434576690196992,
|
|
"signal/advantage_std": 0.13434576690196992,
|
|
"signal/brier_reward/centered_abs_mean": 0.16763521134853362,
|
|
"signal/brier_reward/group_std_mean": 0.21714569628238678,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020954401418566703,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020954401418566703,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08260471224784852,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0987936407327652,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010325589030981065,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010325589030981065,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003630819218233228,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005773447826504708,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.499166338471695e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.499166338471695e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17451754212379456,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23370930552482605,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031238638795912264,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.04130900949239731,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.05173059701919556,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005163626186549664,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005163626186549664,
|
|
"step": 65
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3356664044008323,
|
|
"calibration/batch_distribution_entropy": 0.9384537311646113,
|
|
"calibration/buffer_distribution_entropy": 0.9376239395466122,
|
|
"calibration/confidence_entropy": 0.40272087778388677,
|
|
"calibration/coverage@0%": 0.03441176470588235,
|
|
"calibration/coverage@1%": 0.03441176470588235,
|
|
"calibration/coverage@10%": 0.1807077205882353,
|
|
"calibration/coverage@15%": 0.2613296568627451,
|
|
"calibration/coverage@20%": 0.310640318627451,
|
|
"calibration/coverage@25%": 0.3513939950980392,
|
|
"calibration/coverage@30%": 0.5038296568627451,
|
|
"calibration/coverage@5%": 0.0696966911764706,
|
|
"calibration/ece": 0.1596415861961429,
|
|
"calibration/mean_confidence": 0.5144321171091331,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00087890625,
|
|
"completions/max_length": 1094.8,
|
|
"completions/max_terminated_length": 759.4,
|
|
"completions/mean_length": 166.6509765625,
|
|
"completions/mean_terminated_length": 165.4495086669922,
|
|
"completions/min_length": 77.0,
|
|
"completions/min_terminated_length": 77.0,
|
|
"epoch": 0.224,
|
|
"grad_norm": 0.0021792047191411257,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0025,
|
|
"num_tokens": 234299432.0,
|
|
"reward": 1.0031249046325683,
|
|
"reward_std": 0.10735798627138138,
|
|
"rewards/accuracy_reward": 0.55029296875,
|
|
"rewards/brier_reward": 0.7902113795280457,
|
|
"rewards/confidence_uniqueness_reward": 0.8990541458129883,
|
|
"rewards/format_reward": 0.99912109375,
|
|
"rewards/frontier_aurc_reward": -0.003141326270997524,
|
|
"rewards/frontier_coverage_1": 0.12729544788599015,
|
|
"rewards/frontier_coverage_10": 0.12729544788599015,
|
|
"rewards/frontier_coverage_15": 0.12729544788599015,
|
|
"rewards/frontier_coverage_20": 0.12729544788599015,
|
|
"rewards/frontier_coverage_25": 0.12729544788599015,
|
|
"rewards/frontier_coverage_5": 0.12729544788599015,
|
|
"rewards/frontier_ece_reward": 0.029154983535408973,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.120965576171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.16113831400871276,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0604827880859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0604827880859375,
|
|
"signal/advantage_abs_mean": 0.08187931925058364,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08187931925058364,
|
|
"signal/advantage_pre_scale_std": 0.13045653700828552,
|
|
"signal/advantage_std": 0.13045653700828552,
|
|
"signal/brier_reward/centered_abs_mean": 0.17681266367435455,
|
|
"signal/brier_reward/group_std_mean": 0.2258577436208725,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02210158295929432,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.02210158295929432,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0674636647105217,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.08297923505306244,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008432958088815213,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008432958088815213,
|
|
"signal/format_reward/centered_abs_mean": 0.001702880859375,
|
|
"signal/format_reward/group_std_mean": 0.004971844516694546,
|
|
"signal/format_reward/group_zero_std_frac": 0.971875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036583705339580776,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0056047579273581505,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.548483070218935e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.548483070218935e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19972024559974672,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.26246256828308107,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00357499229721725,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03642488420009613,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04521550685167312,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0045531105250120165,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0045531105250120165,
|
|
"step": 70
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3522414944051539,
|
|
"calibration/batch_distribution_entropy": 0.8803572798405815,
|
|
"calibration/buffer_distribution_entropy": 0.9410783570721908,
|
|
"calibration/confidence_entropy": 0.36782506176165003,
|
|
"calibration/coverage@0%": 0.01875,
|
|
"calibration/coverage@1%": 0.01875,
|
|
"calibration/coverage@10%": 0.09140625,
|
|
"calibration/coverage@15%": 0.128125,
|
|
"calibration/coverage@20%": 0.16328125,
|
|
"calibration/coverage@25%": 0.36796875,
|
|
"calibration/coverage@30%": 0.496875,
|
|
"calibration/coverage@5%": 0.0328125,
|
|
"calibration/ece": 0.1927504647077757,
|
|
"calibration/mean_confidence": 0.5310109334577756,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1190.8,
|
|
"completions/max_terminated_length": 514.6,
|
|
"completions/mean_length": 167.90107421875,
|
|
"completions/mean_terminated_length": 167.3661346435547,
|
|
"completions/min_length": 70.2,
|
|
"completions/min_terminated_length": 70.2,
|
|
"epoch": 0.24,
|
|
"grad_norm": 0.0024263551458716393,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 251270419.0,
|
|
"reward": 1.0291972041130066,
|
|
"reward_std": 0.10552183389663697,
|
|
"rewards/accuracy_reward": 0.60859375,
|
|
"rewards/brier_reward": 0.7870316863059997,
|
|
"rewards/confidence_uniqueness_reward": 0.91382737159729,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.0025115890428423883,
|
|
"rewards/frontier_coverage_1": 0.08323012106120586,
|
|
"rewards/frontier_coverage_10": 0.08323012106120586,
|
|
"rewards/frontier_coverage_15": 0.08323012106120586,
|
|
"rewards/frontier_coverage_20": 0.08323012106120586,
|
|
"rewards/frontier_coverage_25": 0.08323012106120586,
|
|
"rewards/frontier_coverage_5": 0.08323012106120586,
|
|
"rewards/frontier_ece_reward": 0.02914494350552559,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1296142578125,
|
|
"signal/accuracy_reward/group_std_mean": 0.16963129937648774,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06480712890625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06480712890625,
|
|
"signal/advantage_abs_mean": 0.08079658448696136,
|
|
"signal/advantage_pre_scale_abs_mean": 0.08079658448696136,
|
|
"signal/advantage_pre_scale_std": 0.12934576272964476,
|
|
"signal/advantage_std": 0.12934576272964476,
|
|
"signal/brier_reward/centered_abs_mean": 0.17554612457752228,
|
|
"signal/brier_reward/group_std_mean": 0.2262921988964081,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021943265572190285,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021943265572190285,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05461069941520691,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06644331142306328,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006826337426900864,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006826337426900864,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003250430291518569,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.005101799964904785,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.818270146846771e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.818270146846771e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.2036992698907852,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2712432205677032,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003646216681227088,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.03291768655180931,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.04102036878466606,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041147108189761635,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041147108189761635,
|
|
"step": 75
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21512356164155774,
|
|
"calibration/batch_distribution_entropy": 0.8827675644974178,
|
|
"calibration/buffer_distribution_entropy": 0.9422732160404657,
|
|
"calibration/confidence_entropy": 0.36174526720048983,
|
|
"calibration/coverage@0%": 0.06015625,
|
|
"calibration/coverage@1%": 0.06015625,
|
|
"calibration/coverage@10%": 0.2484375,
|
|
"calibration/coverage@15%": 0.4230974264705882,
|
|
"calibration/coverage@20%": 0.5403370098039215,
|
|
"calibration/coverage@25%": 0.6317861519607844,
|
|
"calibration/coverage@30%": 0.7466666666666667,
|
|
"calibration/coverage@5%": 0.1515625,
|
|
"calibration/ece": 0.1830301858879185,
|
|
"calibration/mean_confidence": 0.5492079764272219,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 892.6,
|
|
"completions/max_terminated_length": 458.0,
|
|
"completions/mean_length": 164.4017578125,
|
|
"completions/mean_terminated_length": 163.9999572753906,
|
|
"completions/min_length": 75.8,
|
|
"completions/min_terminated_length": 75.8,
|
|
"epoch": 0.256,
|
|
"grad_norm": 0.0027514868415892124,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 268008709.0,
|
|
"reward": 1.0226893305778504,
|
|
"reward_std": 0.09834913462400437,
|
|
"rewards/accuracy_reward": 0.58173828125,
|
|
"rewards/brier_reward": 0.8035358548164367,
|
|
"rewards/confidence_uniqueness_reward": 0.9133859038352966,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0023928165435791017,
|
|
"rewards/frontier_coverage_1": 0.12931015118956565,
|
|
"rewards/frontier_coverage_10": 0.12931015118956565,
|
|
"rewards/frontier_coverage_15": 0.12931015118956565,
|
|
"rewards/frontier_coverage_20": 0.12931015118956565,
|
|
"rewards/frontier_coverage_25": 0.12931015118956565,
|
|
"rewards/frontier_coverage_5": 0.12931015118956565,
|
|
"rewards/frontier_ece_reward": 0.028441504389047623,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.123785400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1612432286143303,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0618927001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0618927001953125,
|
|
"signal/advantage_abs_mean": 0.07478999271988869,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07478999271988869,
|
|
"signal/advantage_pre_scale_std": 0.12347659170627594,
|
|
"signal/advantage_std": 0.12347659170627594,
|
|
"signal/brier_reward/centered_abs_mean": 0.16572422683238983,
|
|
"signal/brier_reward/group_std_mean": 0.2140843689441681,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020715528354048728,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020715528354048728,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05513819307088852,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.06670184880495071,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006892274133861065,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006892274133861065,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.003011533757671714,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0047927751205861565,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.39064516487997e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.39064516487997e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19958887100219727,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2646804749965668,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035726406611502172,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02867573909461498,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.03616860210895538,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035844673868268727,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035844673868268727,
|
|
"step": 80
|
|
},
|
|
{
|
|
"calibration/aurc": 0.21649586779760063,
|
|
"calibration/batch_distribution_entropy": 0.8773805631285727,
|
|
"calibration/buffer_distribution_entropy": 0.9423614066226536,
|
|
"calibration/confidence_entropy": 0.3884055467369814,
|
|
"calibration/coverage@0%": 0.0109375,
|
|
"calibration/coverage@1%": 0.0109375,
|
|
"calibration/coverage@10%": 0.18828125,
|
|
"calibration/coverage@15%": 0.38828125,
|
|
"calibration/coverage@20%": 0.47734375,
|
|
"calibration/coverage@25%": 0.74609375,
|
|
"calibration/coverage@30%": 0.8078125,
|
|
"calibration/coverage@5%": 0.0921875,
|
|
"calibration/ece": 0.12636070145032918,
|
|
"calibration/mean_confidence": 0.6323173064632307,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1088.2,
|
|
"completions/max_terminated_length": 434.4,
|
|
"completions/mean_length": 169.712890625,
|
|
"completions/mean_terminated_length": 169.04539794921874,
|
|
"completions/min_length": 74.4,
|
|
"completions/min_terminated_length": 74.4,
|
|
"epoch": 0.272,
|
|
"grad_norm": 0.0028915083967149258,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0021,
|
|
"num_tokens": 284712265.0,
|
|
"reward": 1.0215874552726745,
|
|
"reward_std": 0.10492411553859711,
|
|
"rewards/accuracy_reward": 0.58447265625,
|
|
"rewards/brier_reward": 0.7885427713394165,
|
|
"rewards/confidence_uniqueness_reward": 0.9305692434310913,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0025600562803447247,
|
|
"rewards/frontier_coverage_1": 0.11024373397231102,
|
|
"rewards/frontier_coverage_10": 0.11024373397231102,
|
|
"rewards/frontier_coverage_15": 0.11024373397231102,
|
|
"rewards/frontier_coverage_20": 0.11024373397231102,
|
|
"rewards/frontier_coverage_25": 0.11024373397231102,
|
|
"rewards/frontier_coverage_5": 0.11024373397231102,
|
|
"rewards/frontier_ece_reward": 0.024467223882675172,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.121722412109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.16252617239952089,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875,
|
|
"signal/advantage_abs_mean": 0.07876608818769455,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07876608818769455,
|
|
"signal/advantage_pre_scale_std": 0.12760126292705537,
|
|
"signal/advantage_std": 0.12760126292705537,
|
|
"signal/brier_reward/centered_abs_mean": 0.1705150604248047,
|
|
"signal/brier_reward/group_std_mean": 0.22130897045135497,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021314382553100586,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.021314382553100586,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04144674874842167,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.05355666503310204,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005180843593552709,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005180843593552709,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002936669299378991,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0046648337505757805,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.256637814454734e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.256637814454734e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19893713295459747,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2645448505878448,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035609744023531674,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.027610136568546294,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.034552381932735445,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0034512670710682867,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0034512670710682867,
|
|
"step": 85
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24803686043807643,
|
|
"calibration/batch_distribution_entropy": 0.8444372092396852,
|
|
"calibration/buffer_distribution_entropy": 0.9429605942110868,
|
|
"calibration/confidence_entropy": 0.36471631140917615,
|
|
"calibration/coverage@0%": 0.0421875,
|
|
"calibration/coverage@1%": 0.0421875,
|
|
"calibration/coverage@10%": 0.2828125,
|
|
"calibration/coverage@15%": 0.32890625,
|
|
"calibration/coverage@20%": 0.4796875,
|
|
"calibration/coverage@25%": 0.546875,
|
|
"calibration/coverage@30%": 0.6203125,
|
|
"calibration/coverage@5%": 0.196875,
|
|
"calibration/ece": 0.12333755567361956,
|
|
"calibration/mean_confidence": 0.6039962674593244,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1108.2,
|
|
"completions/max_terminated_length": 514.0,
|
|
"completions/mean_length": 166.53251953125,
|
|
"completions/mean_terminated_length": 166.13165893554688,
|
|
"completions/min_length": 74.6,
|
|
"completions/min_terminated_length": 74.6,
|
|
"epoch": 0.288,
|
|
"grad_norm": 0.0030995451379567385,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 301375734.0,
|
|
"reward": 1.0154645323753357,
|
|
"reward_std": 0.10347330272197723,
|
|
"rewards/accuracy_reward": 0.5708984375,
|
|
"rewards/brier_reward": 0.7860757231712341,
|
|
"rewards/confidence_uniqueness_reward": 0.9404593467712402,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002836257731541991,
|
|
"rewards/frontier_coverage_1": 0.1086883544921875,
|
|
"rewards/frontier_coverage_10": 0.1086883544921875,
|
|
"rewards/frontier_coverage_15": 0.1086883544921875,
|
|
"rewards/frontier_coverage_20": 0.1086883544921875,
|
|
"rewards/frontier_coverage_25": 0.1086883544921875,
|
|
"rewards/frontier_coverage_5": 0.1086883544921875,
|
|
"rewards/frontier_ece_reward": 0.022170854546129704,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.1334228515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.17499251067638397,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.503125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06671142578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06671142578125,
|
|
"signal/advantage_abs_mean": 0.07898036390542984,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07898036390542984,
|
|
"signal/advantage_pre_scale_std": 0.12728380411863327,
|
|
"signal/advantage_std": 0.12728380411863327,
|
|
"signal/brier_reward/centered_abs_mean": 0.16704794466495515,
|
|
"signal/brier_reward/group_std_mean": 0.21451664268970488,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020880993083119394,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.020880993083119394,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0323214516043663,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04215872809290886,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004040181450545788,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004040181450545788,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086533904076,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028704125434160233,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004553637374192477,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1380382501520216e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1380382501520216e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.19622489511966706,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2572601854801178,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003512425487861037,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.026541993021965027,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.032760906219482425,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033177491277456284,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033177491277456284,
|
|
"step": 90
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2445192555610003,
|
|
"calibration/batch_distribution_entropy": 0.8722010859854628,
|
|
"calibration/buffer_distribution_entropy": 0.9413355201822619,
|
|
"calibration/confidence_entropy": 0.3738584132253522,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.2203125,
|
|
"calibration/coverage@15%": 0.3078125,
|
|
"calibration/coverage@20%": 0.49765625,
|
|
"calibration/coverage@25%": 0.62578125,
|
|
"calibration/coverage@30%": 0.70390625,
|
|
"calibration/coverage@5%": 0.05859375,
|
|
"calibration/ece": 0.12586600898720163,
|
|
"calibration/mean_confidence": 0.5935988981383198,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 929.8,
|
|
"completions/max_terminated_length": 490.4,
|
|
"completions/mean_length": 169.30595703125,
|
|
"completions/mean_terminated_length": 169.03911743164062,
|
|
"completions/min_length": 80.8,
|
|
"completions/min_terminated_length": 80.8,
|
|
"epoch": 0.304,
|
|
"grad_norm": 0.0025018032174557447,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 318039379.0,
|
|
"reward": 1.0334564208984376,
|
|
"reward_std": 0.09357217103242874,
|
|
"rewards/accuracy_reward": 0.60791015625,
|
|
"rewards/brier_reward": 0.7965248703956604,
|
|
"rewards/confidence_uniqueness_reward": 0.9396258115768432,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0024371820967644454,
|
|
"rewards/frontier_coverage_1": 0.09061380345374345,
|
|
"rewards/frontier_coverage_10": 0.09061380345374345,
|
|
"rewards/frontier_coverage_15": 0.09061380345374345,
|
|
"rewards/frontier_coverage_20": 0.09061380345374345,
|
|
"rewards/frontier_coverage_25": 0.09061380345374345,
|
|
"rewards/frontier_coverage_5": 0.09061380345374345,
|
|
"rewards/frontier_ece_reward": 0.023134828731417655,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.111419677734375,
|
|
"signal/accuracy_reward/group_std_mean": 0.14964892268180846,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0557098388671875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0557098388671875,
|
|
"signal/advantage_abs_mean": 0.07044639587402343,
|
|
"signal/advantage_pre_scale_abs_mean": 0.07044639587402343,
|
|
"signal/advantage_pre_scale_std": 0.11818494796752929,
|
|
"signal/advantage_std": 0.11818494796752929,
|
|
"signal/brier_reward/centered_abs_mean": 0.15556592047214507,
|
|
"signal/brier_reward/group_std_mean": 0.20124119520187378,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019445740059018134,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.019445740059018134,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031480921804904936,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04011792093515396,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003935115225613117,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003935115225613117,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002605660632252693,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004119851719588041,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.664132356992923e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.664132356992923e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17928916215896606,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23793997764587402,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092759851366282,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02426176182925701,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.030135614797472954,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030327202286571263,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030327202286571263,
|
|
"step": 95
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1958580706055439,
|
|
"calibration/batch_distribution_entropy": 0.8434134033250091,
|
|
"calibration/buffer_distribution_entropy": 0.9401943668455596,
|
|
"calibration/confidence_entropy": 0.36734551027565937,
|
|
"calibration/coverage@0%": 0.032889093137254896,
|
|
"calibration/coverage@1%": 0.032889093137254896,
|
|
"calibration/coverage@10%": 0.2699050245098039,
|
|
"calibration/coverage@15%": 0.4144822303921568,
|
|
"calibration/coverage@20%": 0.6098314950980392,
|
|
"calibration/coverage@25%": 0.7403707107843138,
|
|
"calibration/coverage@30%": 0.8201439950980391,
|
|
"calibration/coverage@5%": 0.15497242647058823,
|
|
"calibration/ece": 0.1445590710969227,
|
|
"calibration/mean_confidence": 0.6401742367558774,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1306.8,
|
|
"completions/max_terminated_length": 501.4,
|
|
"completions/mean_length": 168.332421875,
|
|
"completions/mean_terminated_length": 167.664208984375,
|
|
"completions/min_length": 80.2,
|
|
"completions/min_terminated_length": 80.2,
|
|
"epoch": 0.32,
|
|
"grad_norm": 0.0026968803722411394,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0017,
|
|
"num_tokens": 334851807.0,
|
|
"reward": 1.0260323882102966,
|
|
"reward_std": 0.08396224528551102,
|
|
"rewards/accuracy_reward": 0.585546875,
|
|
"rewards/brier_reward": 0.8071523427963256,
|
|
"rewards/confidence_uniqueness_reward": 0.9357501029968261,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.002748763840645552,
|
|
"rewards/frontier_coverage_1": 0.11854975577443838,
|
|
"rewards/frontier_coverage_10": 0.11854975577443838,
|
|
"rewards/frontier_coverage_15": 0.11854975577443838,
|
|
"rewards/frontier_coverage_20": 0.11854975577443838,
|
|
"rewards/frontier_coverage_25": 0.11854975577443838,
|
|
"rewards/frontier_coverage_5": 0.11854975577443838,
|
|
"rewards/frontier_ece_reward": 0.02365802228450775,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08590087890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1201841339468956,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042950439453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042950439453125,
|
|
"signal/advantage_abs_mean": 0.06151105165481567,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06151105165481567,
|
|
"signal/advantage_pre_scale_std": 0.10997560620307922,
|
|
"signal/advantage_std": 0.10997560620307922,
|
|
"signal/brier_reward/centered_abs_mean": 0.14419465661048889,
|
|
"signal/brier_reward/group_std_mean": 0.18665907382965088,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01802433207631111,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01802433207631111,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03390970081090927,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04388536140322685,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004238712601363659,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004238712601363659,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002838291879743338,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004497240483760834,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.080542250652797e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.080542250652797e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1544642448425293,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20439959168434144,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027649099007248878,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02174353301525116,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02732553631067276,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002717941626906395,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002717941626906395,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"eval_calibration/aurc": 0.5706461595447073,
|
|
"eval_calibration/batch_distribution_entropy": 0.8726977897672046,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9393171128839451,
|
|
"eval_calibration/confidence_entropy": 0.4035573482539014,
|
|
"eval_calibration/coverage@0%": 0.015625,
|
|
"eval_calibration/coverage@1%": 0.015625,
|
|
"eval_calibration/coverage@10%": 0.015625,
|
|
"eval_calibration/coverage@15%": 0.015625,
|
|
"eval_calibration/coverage@20%": 0.015625,
|
|
"eval_calibration/coverage@25%": 0.015625,
|
|
"eval_calibration/coverage@30%": 0.015625,
|
|
"eval_calibration/coverage@5%": 0.015625,
|
|
"eval_calibration/ece": 0.24265838290022357,
|
|
"eval_calibration/mean_confidence": 0.556418300597109,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 330.0,
|
|
"eval_completions/max_terminated_length": 330.0,
|
|
"eval_completions/mean_length": 171.14369201660156,
|
|
"eval_completions/mean_terminated_length": 171.14369201660156,
|
|
"eval_completions/min_length": 96.0,
|
|
"eval_completions/min_terminated_length": 96.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 334851807.0,
|
|
"eval_reward": 0.940795511007309,
|
|
"eval_reward_std": 0.23958701640367508,
|
|
"eval_rewards/accuracy_reward": 0.423828125,
|
|
"eval_rewards/brier_reward": 0.7562253475189209,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.89501953125,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.00461793364956975,
|
|
"eval_rewards/frontier_coverage_1": 0.19424864649772644,
|
|
"eval_rewards/frontier_coverage_10": 0.19424864649772644,
|
|
"eval_rewards/frontier_coverage_15": 0.19424864649772644,
|
|
"eval_rewards/frontier_coverage_20": 0.19424864649772644,
|
|
"eval_rewards/frontier_coverage_25": 0.19424864649772644,
|
|
"eval_rewards/frontier_coverage_5": 0.19424864649772644,
|
|
"eval_rewards/frontier_ece_reward": 0.013569748029112816,
|
|
"eval_runtime": 9.8344,
|
|
"eval_samples_per_second": 50.842,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4925154745578766,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625,
|
|
"eval_signal/advantage_abs_mean": 0.21671650558710098,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21671650558710098,
|
|
"eval_signal/advantage_pre_scale_std": 0.23654372990131378,
|
|
"eval_signal/advantage_std": 0.23654372990131378,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2694649398326874,
|
|
"eval_signal/brier_reward/group_std_mean": 0.32334399223327637,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03368311747908592,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.03368311747908592,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0469818115234375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.056731242686510086,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058727264404296875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058727264404296875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005505842389538884,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009029718115925789,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.855458120000549e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.855458120000549e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3532957285642624,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.44448477029800415,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006323992973193526,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.033544719219207764,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.04166281037032604,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041930899024009705,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041930899024009705,
|
|
"eval_steps_per_second": 0.203,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"step": 100,
|
|
"train_probe_calibration/aurc": 0.19455582700187118,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.8136888433586318,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.939399714843496,
|
|
"train_probe_calibration/confidence_entropy": 0.3994395901362911,
|
|
"train_probe_calibration/coverage@0%": 0.125,
|
|
"train_probe_calibration/coverage@1%": 0.125,
|
|
"train_probe_calibration/coverage@10%": 0.296875,
|
|
"train_probe_calibration/coverage@15%": 0.5625,
|
|
"train_probe_calibration/coverage@20%": 0.609375,
|
|
"train_probe_calibration/coverage@25%": 0.84375,
|
|
"train_probe_calibration/coverage@30%": 0.953125,
|
|
"train_probe_calibration/coverage@5%": 0.125,
|
|
"train_probe_calibration/ece": 0.17050834345107496,
|
|
"train_probe_calibration/mean_confidence": 0.6579534095658197,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 360.5,
|
|
"train_probe_completions/max_terminated_length": 360.5,
|
|
"train_probe_completions/mean_length": 168.69681549072266,
|
|
"train_probe_completions/mean_terminated_length": 168.69681549072266,
|
|
"train_probe_completions/min_length": 83.5,
|
|
"train_probe_completions/min_terminated_length": 83.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 334851807.0,
|
|
"train_probe_reward": 1.0333038568496704,
|
|
"train_probe_reward_std": 0.23331268876791,
|
|
"train_probe_rewards/accuracy_reward": 0.61328125,
|
|
"train_probe_rewards/brier_reward": 0.8115493357181549,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.89208984375,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0020465875859372318,
|
|
"train_probe_rewards/frontier_coverage_1": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_coverage_10": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_coverage_15": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_coverage_20": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_coverage_25": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_coverage_5": 0.10023730993270874,
|
|
"train_probe_rewards/frontier_ece_reward": 0.023835722357034683,
|
|
"train_probe_runtime": 9.3482,
|
|
"train_probe_samples_per_second": 53.486,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.462158203125,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.48812438547611237,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2310791015625,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2310791015625,
|
|
"train_probe_signal/advantage_abs_mean": 0.2127402350306511,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2127402350306511,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.23030224442481995,
|
|
"train_probe_signal/advantage_std": 0.23030224442481995,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.22354336827993393,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.28484727442264557,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02794292103499174,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02794292103499174,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.046661376953125,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05584513582289219,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005832672119140625,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005832672119140625,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0033606411889195442,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.005513262702152133,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.0155478422529995e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.0155478422529995e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30417926609516144,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.42205144464969635,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005444808630272746,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.03267330303788185,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.040300922468304634,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004084162879735231,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004084162879735231,
|
|
"train_probe_steps_per_second": 0.214
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2704139956053221,
|
|
"calibration/batch_distribution_entropy": 0.9040743573140506,
|
|
"calibration/buffer_distribution_entropy": 0.9399021081106627,
|
|
"calibration/confidence_entropy": 0.40396196903365356,
|
|
"calibration/coverage@0%": 0.0171875,
|
|
"calibration/coverage@1%": 0.0171875,
|
|
"calibration/coverage@10%": 0.18515625,
|
|
"calibration/coverage@15%": 0.44453125,
|
|
"calibration/coverage@20%": 0.53671875,
|
|
"calibration/coverage@25%": 0.58203125,
|
|
"calibration/coverage@30%": 0.65078125,
|
|
"calibration/coverage@5%": 0.10859375,
|
|
"calibration/ece": 0.1906777345961955,
|
|
"calibration/mean_confidence": 0.5680050226904052,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 638.8,
|
|
"completions/max_terminated_length": 410.2,
|
|
"completions/mean_length": 168.918359375,
|
|
"completions/mean_terminated_length": 168.7849609375,
|
|
"completions/min_length": 75.2,
|
|
"completions/min_terminated_length": 75.2,
|
|
"epoch": 0.336,
|
|
"grad_norm": 0.0018242798978462815,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 351303963.0,
|
|
"reward": 1.0275990962982178,
|
|
"reward_std": 0.09201982617378235,
|
|
"rewards/accuracy_reward": 0.58984375,
|
|
"rewards/brier_reward": 0.8076816439628601,
|
|
"rewards/confidence_uniqueness_reward": 0.9373004913330079,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.002289101597853005,
|
|
"rewards/frontier_coverage_1": 0.11363897696137429,
|
|
"rewards/frontier_coverage_10": 0.11363897696137429,
|
|
"rewards/frontier_coverage_15": 0.11363897696137429,
|
|
"rewards/frontier_coverage_20": 0.11363897696137429,
|
|
"rewards/frontier_coverage_25": 0.11384689658880234,
|
|
"rewards/frontier_coverage_5": 0.11363897696137429,
|
|
"rewards/frontier_ece_reward": 0.020657552778720854,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10953369140625,
|
|
"signal/accuracy_reward/group_std_mean": 0.15012021660804747,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054766845703125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054766845703125,
|
|
"signal/advantage_abs_mean": 0.06871124505996704,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06871124505996704,
|
|
"signal/advantage_pre_scale_std": 0.11801368445158004,
|
|
"signal/advantage_std": 0.11801368445158004,
|
|
"signal/brier_reward/centered_abs_mean": 0.14712486565113067,
|
|
"signal/brier_reward/group_std_mean": 0.1890992045402527,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018390608206391334,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.018390608206391334,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030815805494785308,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.039999409765005114,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038519756868481635,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038519756868481635,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086068242787,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002245573024265468,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0035346172749996184,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.019575717393309e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.019575717393309e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1706451177597046,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.22382004261016847,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1706451177597046,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.22382004261016847,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1706451177597046,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.22382004261016847,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1706451177597046,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.22382004261016847,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.16888906955718994,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22164588570594787,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030231142416596414,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030231142416596414,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1706451177597046,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.22382004261016847,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030545474495738746,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.02038377448916435,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.025580647960305215,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002547971811145544,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002547971811145544,
|
|
"step": 105
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24142044817482367,
|
|
"calibration/batch_distribution_entropy": 0.8656001475514318,
|
|
"calibration/buffer_distribution_entropy": 0.9409112708199789,
|
|
"calibration/confidence_entropy": 0.3802200750041355,
|
|
"calibration/coverage@0%": 0.04068321078431373,
|
|
"calibration/coverage@1%": 0.04068321078431373,
|
|
"calibration/coverage@10%": 0.2774356617647059,
|
|
"calibration/coverage@15%": 0.40088541666666666,
|
|
"calibration/coverage@20%": 0.49231617647058823,
|
|
"calibration/coverage@25%": 0.6063909313725491,
|
|
"calibration/coverage@30%": 0.6743841911764706,
|
|
"calibration/coverage@5%": 0.17272365196078432,
|
|
"calibration/ece": 0.08609408255888487,
|
|
"calibration/mean_confidence": 0.5179847446127612,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 821.2,
|
|
"completions/max_terminated_length": 582.6,
|
|
"completions/mean_length": 169.8603515625,
|
|
"completions/mean_terminated_length": 169.72701416015624,
|
|
"completions/min_length": 78.2,
|
|
"completions/min_terminated_length": 78.2,
|
|
"epoch": 0.352,
|
|
"grad_norm": 0.002221801085397601,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 368303749.0,
|
|
"reward": 0.9986960291862488,
|
|
"reward_std": 0.0819695919752121,
|
|
"rewards/accuracy_reward": 0.52705078125,
|
|
"rewards/brier_reward": 0.7985443472862244,
|
|
"rewards/confidence_uniqueness_reward": 0.9362337708473205,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0025407387875020504,
|
|
"rewards/frontier_coverage_1": 0.15299645960330963,
|
|
"rewards/frontier_coverage_10": 0.15299645960330963,
|
|
"rewards/frontier_coverage_15": 0.15299645960330963,
|
|
"rewards/frontier_coverage_20": 0.15299645960330963,
|
|
"rewards/frontier_coverage_25": 0.14608888924121857,
|
|
"rewards/frontier_coverage_5": 0.15299645960330963,
|
|
"rewards/frontier_ece_reward": 0.017266629636287688,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.095184326171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.1254624456167221,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0475921630859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0475921630859375,
|
|
"signal/advantage_abs_mean": 0.06207955777645111,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06207955777645111,
|
|
"signal/advantage_pre_scale_std": 0.10653006732463836,
|
|
"signal/advantage_std": 0.10653006732463836,
|
|
"signal/brier_reward/centered_abs_mean": 0.1445058435201645,
|
|
"signal/brier_reward/group_std_mean": 0.1861796945333481,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01806323044002056,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01806323044002056,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030342183634638788,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0386995404958725,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037927729543298485,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037927729543298485,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001989635010249913,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0031029653735458853,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.561446574167349e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.561446574167349e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17928497791290282,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23090406954288484,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17928497791290282,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23090406954288484,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17928497791290282,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23090406954288484,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17928497791290282,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23090406954288484,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.17467791438102723,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.22531512677669524,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031267345417290925,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031267345417290925,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17928497791290282,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23090406954288484,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092009671032427,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.018327732756733894,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02301064059138298,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022909665945917367,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022909665945917367,
|
|
"step": 110
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25086900973025095,
|
|
"calibration/batch_distribution_entropy": 0.9186404347150997,
|
|
"calibration/buffer_distribution_entropy": 0.9416779594893804,
|
|
"calibration/confidence_entropy": 0.4035477266164838,
|
|
"calibration/coverage@0%": 0.03438112745098039,
|
|
"calibration/coverage@1%": 0.03438112745098039,
|
|
"calibration/coverage@10%": 0.1820373774509804,
|
|
"calibration/coverage@15%": 0.21797487745098038,
|
|
"calibration/coverage@20%": 0.4117984068627451,
|
|
"calibration/coverage@25%": 0.5314705882352941,
|
|
"calibration/coverage@30%": 0.6292800245098039,
|
|
"calibration/coverage@5%": 0.16172487745098038,
|
|
"calibration/ece": 0.15323516031703535,
|
|
"calibration/mean_confidence": 0.5679697731010848,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 904.2,
|
|
"completions/max_terminated_length": 493.4,
|
|
"completions/mean_length": 169.8033203125,
|
|
"completions/mean_terminated_length": 169.5369659423828,
|
|
"completions/min_length": 76.8,
|
|
"completions/min_terminated_length": 76.8,
|
|
"epoch": 0.368,
|
|
"grad_norm": 0.0022946952376514673,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 385108007.0,
|
|
"reward": 1.0292543292045593,
|
|
"reward_std": 0.0754001870751381,
|
|
"rewards/accuracy_reward": 0.58623046875,
|
|
"rewards/brier_reward": 0.8222095847129822,
|
|
"rewards/confidence_uniqueness_reward": 0.9371579766273499,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0019215317443013191,
|
|
"rewards/frontier_coverage_1": 0.13132742196321487,
|
|
"rewards/frontier_coverage_10": 0.13132742196321487,
|
|
"rewards/frontier_coverage_15": 0.13132742196321487,
|
|
"rewards/frontier_coverage_20": 0.13132742196321487,
|
|
"rewards/frontier_coverage_25": 0.12279371917247772,
|
|
"rewards/frontier_coverage_5": 0.13132742196321487,
|
|
"rewards/frontier_ece_reward": 0.01957782618701458,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084307861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11866024732589722,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625,
|
|
"signal/advantage_abs_mean": 0.05573421791195869,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05573421791195869,
|
|
"signal/advantage_pre_scale_std": 0.09996391981840133,
|
|
"signal/advantage_std": 0.09996391981840133,
|
|
"signal/brier_reward/centered_abs_mean": 0.131490296125412,
|
|
"signal/brier_reward/group_std_mean": 0.17088458240032195,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0164362870156765,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.0164362870156765,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02735428810119629,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.034791599959135056,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003419286012649536,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003419286012649536,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017890902236104012,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.00294273984618485,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2024714892031624e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2024714892031624e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.16438928842544556,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21578606963157654,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.16438928842544556,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21578606963157654,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.16438928842544556,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21578606963157654,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.16438928842544556,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21578606963157654,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.15546331703662872,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.2039874643087387,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027827932965010403,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027827932965010403,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.16438928842544556,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21578606963157654,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002942568203434348,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.01668607220053673,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.02099420689046383,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002085759025067091,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002085759025067091,
|
|
"step": 115
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2703069251334617,
|
|
"calibration/batch_distribution_entropy": 0.8990586414219536,
|
|
"calibration/buffer_distribution_entropy": 0.9422862701184694,
|
|
"calibration/confidence_entropy": 0.4024924860193864,
|
|
"calibration/coverage@0%": 0.04765625,
|
|
"calibration/coverage@1%": 0.04765625,
|
|
"calibration/coverage@10%": 0.38125,
|
|
"calibration/coverage@15%": 0.425,
|
|
"calibration/coverage@20%": 0.4703125,
|
|
"calibration/coverage@25%": 0.515625,
|
|
"calibration/coverage@30%": 0.55546875,
|
|
"calibration/coverage@5%": 0.23828125,
|
|
"calibration/ece": 0.1594206011865718,
|
|
"calibration/mean_confidence": 0.5080374241118275,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 729.8,
|
|
"completions/max_terminated_length": 581.2,
|
|
"completions/mean_length": 171.50654296875,
|
|
"completions/mean_terminated_length": 171.239794921875,
|
|
"completions/min_length": 76.6,
|
|
"completions/min_terminated_length": 76.6,
|
|
"epoch": 0.384,
|
|
"grad_norm": 0.0017057686345651746,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 401720746.0,
|
|
"reward": 1.030127477645874,
|
|
"reward_std": 0.08005195558071136,
|
|
"rewards/accuracy_reward": 0.5873046875,
|
|
"rewards/brier_reward": 0.8276395082473755,
|
|
"rewards/confidence_uniqueness_reward": 0.9362314462661743,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.001922252168878913,
|
|
"rewards/frontier_coverage_1": 0.13151057362556456,
|
|
"rewards/frontier_coverage_10": 0.13151057362556456,
|
|
"rewards/frontier_coverage_15": 0.13151057362556456,
|
|
"rewards/frontier_coverage_20": 0.13151057362556456,
|
|
"rewards/frontier_coverage_25": 0.11768633276224136,
|
|
"rewards/frontier_coverage_5": 0.13151057362556456,
|
|
"rewards/frontier_ece_reward": 0.019144237600266935,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09979248046875,
|
|
"signal/accuracy_reward/group_std_mean": 0.13565291166305543,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049896240234375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049896240234375,
|
|
"signal/advantage_abs_mean": 0.058755910396575926,
|
|
"signal/advantage_pre_scale_abs_mean": 0.058755910396575926,
|
|
"signal/advantage_pre_scale_std": 0.10700914263725281,
|
|
"signal/advantage_std": 0.10700914263725281,
|
|
"signal/brier_reward/centered_abs_mean": 0.12325199693441391,
|
|
"signal/brier_reward/group_std_mean": 0.16149061620235444,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015406499616801739,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015406499616801739,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027864859998226167,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.036340619623661044,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003483107499778271,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003483107499778271,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.002762135770171881,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017306852154433728,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002775628166273236,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.097926237387583e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.097926237387583e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15827414095401765,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.20665526986122132,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15827414095401765,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.20665526986122132,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15827414095401765,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.20665526986122132,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15827414095401765,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.20665526986122132,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13856834620237352,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.1815927118062973,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024803733453154565,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024803733453154565,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15827414095401765,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.20665526986122132,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002833107067272067,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015285241603851318,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.019187380746006965,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019106552004814147,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019106552004814147,
|
|
"step": 120
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29008772776734093,
|
|
"calibration/batch_distribution_entropy": 0.9157935092331382,
|
|
"calibration/buffer_distribution_entropy": 0.9443153619361073,
|
|
"calibration/confidence_entropy": 0.4131123080990579,
|
|
"calibration/coverage@0%": 0.017981004901960786,
|
|
"calibration/coverage@1%": 0.017981004901960786,
|
|
"calibration/coverage@10%": 0.043762254901960784,
|
|
"calibration/coverage@15%": 0.14454350490196077,
|
|
"calibration/coverage@20%": 0.28598345588235297,
|
|
"calibration/coverage@25%": 0.46881740196078436,
|
|
"calibration/coverage@30%": 0.6245557598039216,
|
|
"calibration/coverage@5%": 0.017981004901960786,
|
|
"calibration/ece": 0.16912397182889852,
|
|
"calibration/mean_confidence": 0.5452109849824474,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 633.8,
|
|
"completions/max_terminated_length": 433.0,
|
|
"completions/mean_length": 174.31162109375,
|
|
"completions/mean_terminated_length": 174.17910766601562,
|
|
"completions/min_length": 85.2,
|
|
"completions/min_terminated_length": 85.2,
|
|
"epoch": 0.4,
|
|
"grad_norm": 0.0025587843265384436,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 418542145.0,
|
|
"reward": 1.0214404821395875,
|
|
"reward_std": 0.084488844871521,
|
|
"rewards/accuracy_reward": 0.583984375,
|
|
"rewards/brier_reward": 0.7968339323997498,
|
|
"rewards/confidence_uniqueness_reward": 0.936758029460907,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.0024314658250659702,
|
|
"rewards/frontier_coverage_1": 0.10493959616869689,
|
|
"rewards/frontier_coverage_10": 0.10493959616869689,
|
|
"rewards/frontier_coverage_15": 0.10493959616869689,
|
|
"rewards/frontier_coverage_20": 0.10493959616869689,
|
|
"rewards/frontier_coverage_25": 0.0941769102588296,
|
|
"rewards/frontier_coverage_5": 0.10493959616869689,
|
|
"rewards/frontier_ece_reward": 0.01528221946209669,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.103076171875,
|
|
"signal/accuracy_reward/group_std_mean": 0.14248399436473846,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.56875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515380859375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0515380859375,
|
|
"signal/advantage_abs_mean": 0.06206804737448692,
|
|
"signal/advantage_pre_scale_abs_mean": 0.06206804737448692,
|
|
"signal/advantage_pre_scale_std": 0.11141373813152314,
|
|
"signal/advantage_std": 0.11141373813152314,
|
|
"signal/brier_reward/centered_abs_mean": 0.1348109632730484,
|
|
"signal/brier_reward/group_std_mean": 0.1749127984046936,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01685137040913105,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01685137040913105,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02733922004699707,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035421935841441154,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003417402505874634,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003417402505874634,
|
|
"signal/format_reward/centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/group_std_mean": 0.0022097086068242787,
|
|
"signal/format_reward/group_zero_std_frac": 0.9875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002029798785224557,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0032225903123617172,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.633339802036062e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.633339802036062e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15750395655632018,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.2061130702495575,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15750395655632018,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.2061130702495575,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15750395655632018,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.2061130702495575,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15750395655632018,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.2061130702495575,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.12791687697172166,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.16793505549430848,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022897121030837297,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022897121030837297,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15750395655632018,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.2061130702495575,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002819320699200034,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.015673490427434444,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01953093260526657,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019591863034293055,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019591863034293055,
|
|
"step": 125
|
|
},
|
|
{
|
|
"calibration/aurc": 0.300496468657274,
|
|
"calibration/batch_distribution_entropy": 0.860343677952838,
|
|
"calibration/buffer_distribution_entropy": 0.9456375276771343,
|
|
"calibration/confidence_entropy": 0.4292367542687396,
|
|
"calibration/coverage@0%": 0.04453125,
|
|
"calibration/coverage@1%": 0.04453125,
|
|
"calibration/coverage@10%": 0.3421875,
|
|
"calibration/coverage@15%": 0.47421875,
|
|
"calibration/coverage@20%": 0.56484375,
|
|
"calibration/coverage@25%": 0.61171875,
|
|
"calibration/coverage@30%": 0.61875,
|
|
"calibration/coverage@5%": 0.13828125,
|
|
"calibration/ece": 0.1725572010194605,
|
|
"calibration/mean_confidence": 0.5447724534313727,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 661.0,
|
|
"completions/max_terminated_length": 436.8,
|
|
"completions/mean_length": 177.3251953125,
|
|
"completions/mean_terminated_length": 177.19320983886718,
|
|
"completions/min_length": 80.6,
|
|
"completions/min_terminated_length": 80.6,
|
|
"epoch": 0.416,
|
|
"grad_norm": 0.0018295373301953077,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 435239139.0,
|
|
"reward": 1.0142476677894592,
|
|
"reward_std": 0.07882001847028733,
|
|
"rewards/accuracy_reward": 0.5599609375,
|
|
"rewards/brier_reward": 0.8115284204483032,
|
|
"rewards/confidence_uniqueness_reward": 0.9416091442108154,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.001915666786953807,
|
|
"rewards/frontier_coverage_1": 0.12817499786615372,
|
|
"rewards/frontier_coverage_10": 0.12817499786615372,
|
|
"rewards/frontier_coverage_15": 0.12817499786615372,
|
|
"rewards/frontier_coverage_20": 0.12817499786615372,
|
|
"rewards/frontier_coverage_25": 0.11044178158044815,
|
|
"rewards/frontier_coverage_5": 0.12817499786615372,
|
|
"rewards/frontier_ece_reward": 0.014857827685773373,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0997314453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13128983080387116,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04986572265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04986572265625,
|
|
"signal/advantage_abs_mean": 0.05995083674788475,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05995083674788475,
|
|
"signal/advantage_pre_scale_std": 0.10547690689563752,
|
|
"signal/advantage_std": 0.10547690689563752,
|
|
"signal/brier_reward/centered_abs_mean": 0.12911611646413804,
|
|
"signal/brier_reward/group_std_mean": 0.16568702459335327,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016139514558017255,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.016139514558017255,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02463034950196743,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031486156210303304,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030787936877459286,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030787936877459286,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014531841035932303,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002290627988986671,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6011993395513854e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6011993395513854e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.17003713846206664,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.21856652796268464,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.17003713846206664,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.21856652796268464,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.17003713846206664,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.21856652796268464,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.17003713846206664,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.21856652796268464,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1356060341000557,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.17483413219451904,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002427347889170051,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002427347889170051,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.17003713846206664,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.21856652796268464,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030436647590249776,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013920800760388374,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.017522389814257622,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017401000950485468,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017401000950485468,
|
|
"step": 130
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1721532756826449,
|
|
"calibration/batch_distribution_entropy": 0.8766754582035976,
|
|
"calibration/buffer_distribution_entropy": 0.9459665615870213,
|
|
"calibration/confidence_entropy": 0.3974590959603793,
|
|
"calibration/coverage@0%": 0.10237132352941176,
|
|
"calibration/coverage@1%": 0.16409007352941177,
|
|
"calibration/coverage@10%": 0.5047549019607842,
|
|
"calibration/coverage@15%": 0.5892555147058823,
|
|
"calibration/coverage@20%": 0.6220955882352941,
|
|
"calibration/coverage@25%": 0.6886182598039217,
|
|
"calibration/coverage@30%": 0.7379197303921569,
|
|
"calibration/coverage@5%": 0.44065257352941173,
|
|
"calibration/ece": 0.15323011827988556,
|
|
"calibration/mean_confidence": 0.6029602178207593,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 753.0,
|
|
"completions/max_terminated_length": 553.2,
|
|
"completions/mean_length": 176.35029296875,
|
|
"completions/mean_terminated_length": 176.21759643554688,
|
|
"completions/min_length": 84.8,
|
|
"completions/min_terminated_length": 84.8,
|
|
"epoch": 0.432,
|
|
"grad_norm": 0.0018569445237517357,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 452059302.0,
|
|
"reward": 1.0361051321029664,
|
|
"reward_std": 0.07464597374200821,
|
|
"rewards/accuracy_reward": 0.598828125,
|
|
"rewards/brier_reward": 0.8299178600311279,
|
|
"rewards/confidence_uniqueness_reward": 0.9411059260368347,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.001714168442413211,
|
|
"rewards/frontier_coverage_1": 0.12789682820439338,
|
|
"rewards/frontier_coverage_10": 0.12789682820439338,
|
|
"rewards/frontier_coverage_15": 0.12789682820439338,
|
|
"rewards/frontier_coverage_20": 0.12789682820439338,
|
|
"rewards/frontier_coverage_25": 0.1091009445488453,
|
|
"rewards/frontier_coverage_5": 0.12789682820439338,
|
|
"rewards/frontier_ece_reward": 0.016333967633545398,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09854736328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12864942103624344,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049273681640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049273681640625,
|
|
"signal/advantage_abs_mean": 0.05766047313809395,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05766047313809395,
|
|
"signal/advantage_pre_scale_std": 0.10433387905359268,
|
|
"signal/advantage_std": 0.10433387905359268,
|
|
"signal/brier_reward/centered_abs_mean": 0.11633200347423553,
|
|
"signal/brier_reward/group_std_mean": 0.14996844828128814,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014541500434279441,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014541500434279441,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025572020933032034,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03261452466249466,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031965026166290043,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031965026166290043,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014799919212237001,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023754774127155544,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6491854441701435e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6491854441701435e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15234991312026977,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19905296862125396,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15234991312026977,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19905296862125396,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15234991312026977,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19905296862125396,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15234991312026977,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19905296862125396,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.1211901381611824,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15898216962814332,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021693034097552298,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021693034097552298,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15234991312026977,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19905296862125396,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002727063372731209,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012844923511147499,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01613148283213377,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016056154388934373,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016056154388934373,
|
|
"step": 135
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25730294087394334,
|
|
"calibration/batch_distribution_entropy": 0.8953334895238516,
|
|
"calibration/buffer_distribution_entropy": 0.9452347981622419,
|
|
"calibration/confidence_entropy": 0.40368902257402794,
|
|
"calibration/coverage@0%": 0.00625,
|
|
"calibration/coverage@1%": 0.00625,
|
|
"calibration/coverage@10%": 0.2546875,
|
|
"calibration/coverage@15%": 0.32578125,
|
|
"calibration/coverage@20%": 0.4125,
|
|
"calibration/coverage@25%": 0.4640625,
|
|
"calibration/coverage@30%": 0.67890625,
|
|
"calibration/coverage@5%": 0.02109375,
|
|
"calibration/ece": 0.1689719891195878,
|
|
"calibration/mean_confidence": 0.6208841497585194,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 462.2,
|
|
"completions/max_terminated_length": 462.2,
|
|
"completions/mean_length": 182.95234375,
|
|
"completions/mean_terminated_length": 182.95234375,
|
|
"completions/min_length": 85.8,
|
|
"completions/min_terminated_length": 85.8,
|
|
"epoch": 0.448,
|
|
"grad_norm": 0.0040541719645261765,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 468885534.0,
|
|
"reward": 1.0192960262298585,
|
|
"reward_std": 0.07691188901662827,
|
|
"rewards/accuracy_reward": 0.56416015625,
|
|
"rewards/brier_reward": 0.8227449178695678,
|
|
"rewards/confidence_uniqueness_reward": 0.9410862445831298,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0022552535170689224,
|
|
"rewards/frontier_coverage_1": 0.14381106197834015,
|
|
"rewards/frontier_coverage_10": 0.14381106197834015,
|
|
"rewards/frontier_coverage_15": 0.14381106197834015,
|
|
"rewards/frontier_coverage_20": 0.14381106197834015,
|
|
"rewards/frontier_coverage_25": 0.12079337984323502,
|
|
"rewards/frontier_coverage_5": 0.14381106197834015,
|
|
"rewards/frontier_ece_reward": 0.014734631776809693,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.091912841796875,
|
|
"signal/accuracy_reward/group_std_mean": 0.12225985080003739,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459564208984375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459564208984375,
|
|
"signal/advantage_abs_mean": 0.057493841648101805,
|
|
"signal/advantage_pre_scale_abs_mean": 0.057493841648101805,
|
|
"signal/advantage_pre_scale_std": 0.10439873188734054,
|
|
"signal/advantage_std": 0.10439873188734054,
|
|
"signal/brier_reward/centered_abs_mean": 0.1224316492676735,
|
|
"signal/brier_reward/group_std_mean": 0.1602381944656372,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015303956158459187,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.015303956158459187,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026432880386710166,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03415291607379913,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033041100483387708,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033041100483387708,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001860675076022744,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003051386307924986,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.330608233227394e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.330608233227394e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15158057063817978,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19816445112228392,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15158057063817978,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19816445112228392,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15158057063817978,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19816445112228392,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15158057063817978,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19816445112228392,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11687376201152802,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15403735041618347,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020920401671901344,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020920401671901344,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15158057063817978,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19816445112228392,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713292092084885,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.013030365109443665,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01642268504947424,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001628795638680458,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001628795638680458,
|
|
"step": 140
|
|
},
|
|
{
|
|
"calibration/aurc": 0.31054807919449445,
|
|
"calibration/batch_distribution_entropy": 0.8982943819177818,
|
|
"calibration/buffer_distribution_entropy": 0.9453990493361039,
|
|
"calibration/confidence_entropy": 0.4392308925461128,
|
|
"calibration/coverage@0%": 0.010159313725490195,
|
|
"calibration/coverage@1%": 0.010159313725490195,
|
|
"calibration/coverage@10%": 0.11797181372549019,
|
|
"calibration/coverage@15%": 0.1523468137254902,
|
|
"calibration/coverage@20%": 0.2476593137254902,
|
|
"calibration/coverage@25%": 0.2921905637254902,
|
|
"calibration/coverage@30%": 0.5638878676470588,
|
|
"calibration/coverage@5%": 0.010159313725490195,
|
|
"calibration/ece": 0.16542806795759996,
|
|
"calibration/mean_confidence": 0.6450125473590109,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1155.6,
|
|
"completions/max_terminated_length": 662.2,
|
|
"completions/mean_length": 187.3685546875,
|
|
"completions/mean_terminated_length": 186.973779296875,
|
|
"completions/min_length": 88.8,
|
|
"completions/min_terminated_length": 88.8,
|
|
"epoch": 0.464,
|
|
"grad_norm": 0.0020874959882348776,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 485975004.0,
|
|
"reward": 0.9936848402023315,
|
|
"reward_std": 0.07820483893156052,
|
|
"rewards/accuracy_reward": 0.5205078125,
|
|
"rewards/brier_reward": 0.7968811154365539,
|
|
"rewards/confidence_uniqueness_reward": 0.934678053855896,
|
|
"rewards/format_reward": 0.99921875,
|
|
"rewards/frontier_aurc_reward": -0.0026155672036111354,
|
|
"rewards/frontier_coverage_1": 0.15401808321475982,
|
|
"rewards/frontier_coverage_10": 0.15401808321475982,
|
|
"rewards/frontier_coverage_15": 0.15401808321475982,
|
|
"rewards/frontier_coverage_20": 0.15401808321475982,
|
|
"rewards/frontier_coverage_25": 0.12326906770467758,
|
|
"rewards/frontier_coverage_5": 0.15401808321475982,
|
|
"rewards/frontier_ece_reward": 0.011458772234618664,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084423828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11666271984577178,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422119140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422119140625,
|
|
"signal/advantage_abs_mean": 0.05724867507815361,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05724867507815361,
|
|
"signal/advantage_pre_scale_std": 0.10630969554185868,
|
|
"signal/advantage_std": 0.10630969554185868,
|
|
"signal/brier_reward/centered_abs_mean": 0.12861161679029465,
|
|
"signal/brier_reward/group_std_mean": 0.16630764305591583,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01607645209878683,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01607645209878683,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028835254535079003,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03829977139830589,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036044068168848754,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036044068168848754,
|
|
"signal/format_reward/centered_abs_mean": 0.001513671875,
|
|
"signal/format_reward/group_std_mean": 0.004419417260214687,
|
|
"signal/format_reward/group_zero_std_frac": 0.975,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019000403117388487,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030668860767036677,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.40107213560259e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.40107213560259e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14896406829357148,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19503563046455383,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14896406829357148,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19503563046455383,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14896406829357148,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19503563046455383,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14896406829357148,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19503563046455383,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.11530720740556717,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.15157280564308168,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020639989525079727,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020639989525079727,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14896406829357148,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19503563046455383,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002666456811130047,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012446103803813457,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015705187618732453,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001555762975476682,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001555762975476682,
|
|
"step": 145
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24557281662898522,
|
|
"calibration/batch_distribution_entropy": 0.8341399106383033,
|
|
"calibration/buffer_distribution_entropy": 0.9449403760979717,
|
|
"calibration/confidence_entropy": 0.3687400847573974,
|
|
"calibration/coverage@0%": 0.065625,
|
|
"calibration/coverage@1%": 0.065625,
|
|
"calibration/coverage@10%": 0.22734375,
|
|
"calibration/coverage@15%": 0.3375,
|
|
"calibration/coverage@20%": 0.446875,
|
|
"calibration/coverage@25%": 0.56953125,
|
|
"calibration/coverage@30%": 0.74375,
|
|
"calibration/coverage@5%": 0.14296875,
|
|
"calibration/ece": 0.11169667756470028,
|
|
"calibration/mean_confidence": 0.5292462022999398,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 911.8,
|
|
"completions/max_terminated_length": 479.0,
|
|
"completions/mean_length": 183.63916015625,
|
|
"completions/mean_terminated_length": 183.241748046875,
|
|
"completions/min_length": 88.6,
|
|
"completions/min_terminated_length": 88.6,
|
|
"epoch": 0.48,
|
|
"grad_norm": 0.0024119976442307234,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0014,
|
|
"num_tokens": 502903501.0,
|
|
"reward": 1.0209609508514403,
|
|
"reward_std": 0.08060411512851715,
|
|
"rewards/accuracy_reward": 0.57626953125,
|
|
"rewards/brier_reward": 0.8086855053901673,
|
|
"rewards/confidence_uniqueness_reward": 0.9317057371139527,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0018240779172629118,
|
|
"rewards/frontier_coverage_1": 0.1329729899764061,
|
|
"rewards/frontier_coverage_10": 0.1329729899764061,
|
|
"rewards/frontier_coverage_15": 0.1329729899764061,
|
|
"rewards/frontier_coverage_20": 0.1329729899764061,
|
|
"rewards/frontier_coverage_25": 0.10789064913988114,
|
|
"rewards/frontier_coverage_5": 0.1329729899764061,
|
|
"rewards/frontier_ece_reward": 0.012992727011442185,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.114581298828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1539652705192566,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.55,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0572906494140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0572906494140625,
|
|
"signal/advantage_abs_mean": 0.060080311447381976,
|
|
"signal/advantage_pre_scale_abs_mean": 0.060080311447381976,
|
|
"signal/advantage_pre_scale_std": 0.10676742047071457,
|
|
"signal/advantage_std": 0.10676742047071457,
|
|
"signal/brier_reward/centered_abs_mean": 0.13224513232707977,
|
|
"signal/brier_reward/group_std_mean": 0.16912654638290406,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01653064154088497,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01653064154088497,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030292441695928575,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0388321079313755,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003786555211991072,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003786555211991072,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013736919732764362,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021870420314371586,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.458908493281342e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.458908493281342e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.18107914328575134,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.23383658230304719,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.18107914328575134,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.23383658230304719,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.18107914328575134,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.23383658230304719,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.18107914328575134,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.23383658230304719,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.13092263638973237,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.170234015583992,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002343515120446682,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002343515120446682,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.18107914328575134,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.23383658230304719,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003241316508501768,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.012281083315610886,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.015359072759747506,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015351354144513608,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015351354144513608,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"eval_calibration/aurc": 0.406549383364957,
|
|
"eval_calibration/batch_distribution_entropy": 0.8909663454672565,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9446340496301424,
|
|
"eval_calibration/confidence_entropy": 0.46453067903100087,
|
|
"eval_calibration/coverage@0%": 0.078125,
|
|
"eval_calibration/coverage@1%": 0.078125,
|
|
"eval_calibration/coverage@10%": 0.078125,
|
|
"eval_calibration/coverage@15%": 0.109375,
|
|
"eval_calibration/coverage@20%": 0.109375,
|
|
"eval_calibration/coverage@25%": 0.140625,
|
|
"eval_calibration/coverage@30%": 0.21875,
|
|
"eval_calibration/coverage@5%": 0.078125,
|
|
"eval_calibration/ece": 0.1869676292547091,
|
|
"eval_calibration/mean_confidence": 0.5198574552595667,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 451.5,
|
|
"eval_completions/max_terminated_length": 451.5,
|
|
"eval_completions/mean_length": 184.18531799316406,
|
|
"eval_completions/mean_terminated_length": 184.18531799316406,
|
|
"eval_completions/min_length": 94.5,
|
|
"eval_completions/min_terminated_length": 94.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 502903501.0,
|
|
"eval_reward": 0.9559306502342224,
|
|
"eval_reward_std": 0.22658731788396835,
|
|
"eval_rewards/accuracy_reward": 0.443359375,
|
|
"eval_rewards/brier_reward": 0.7953170835971832,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.896484375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.0025530497077852488,
|
|
"eval_rewards/frontier_coverage_1": 0.20948684215545654,
|
|
"eval_rewards/frontier_coverage_10": 0.20948684215545654,
|
|
"eval_rewards/frontier_coverage_15": 0.20948684215545654,
|
|
"eval_rewards/frontier_coverage_20": 0.20948684215545654,
|
|
"eval_rewards/frontier_coverage_25": 0.1534598395228386,
|
|
"eval_rewards/frontier_coverage_5": 0.20948684215545654,
|
|
"eval_rewards/frontier_ece_reward": 0.010603584349155426,
|
|
"eval_runtime": 11.305,
|
|
"eval_samples_per_second": 44.228,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4813232421875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49823255836963654,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24066162109375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.24066162109375,
|
|
"eval_signal/advantage_abs_mean": 0.20942430198192596,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.20942430198192596,
|
|
"eval_signal/advantage_pre_scale_std": 0.223799467086792,
|
|
"eval_signal/advantage_std": 0.223799467086792,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.21622556447982788,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2670576274394989,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027028195559978485,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027028195559978485,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0438079833984375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05144248157739639,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054759979248046875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054759979248046875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002718214178457856,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004650075454264879,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.865603295911569e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.865603295911569e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3949373662471771,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4775615483522415,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3949373662471771,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4775615483522415,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3949373662471771,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.4775615483522415,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3949373662471771,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.4775615483522415,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2780953347682953,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.34059378504753113,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004977906821295619,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004977906821295619,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3949373662471771,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4775615483522415,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00706937862560153,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.017484422773122787,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.021346506662666798,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021855528466403484,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021855528466403484,
|
|
"eval_steps_per_second": 0.177,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"step": 150,
|
|
"train_probe_calibration/aurc": 0.15857725846094667,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.9130259925773516,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.944713675838337,
|
|
"train_probe_calibration/confidence_entropy": 0.4207142744029133,
|
|
"train_probe_calibration/coverage@0%": 0.125,
|
|
"train_probe_calibration/coverage@1%": 0.125,
|
|
"train_probe_calibration/coverage@10%": 0.5,
|
|
"train_probe_calibration/coverage@15%": 0.65625,
|
|
"train_probe_calibration/coverage@20%": 0.75,
|
|
"train_probe_calibration/coverage@25%": 0.828125,
|
|
"train_probe_calibration/coverage@30%": 0.875,
|
|
"train_probe_calibration/coverage@5%": 0.390625,
|
|
"train_probe_calibration/ece": 0.20172079164661832,
|
|
"train_probe_calibration/mean_confidence": 0.5198955010753575,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 321.5,
|
|
"train_probe_completions/max_terminated_length": 321.5,
|
|
"train_probe_completions/mean_length": 178.07261657714844,
|
|
"train_probe_completions/mean_terminated_length": 178.07261657714844,
|
|
"train_probe_completions/min_length": 96.0,
|
|
"train_probe_completions/min_terminated_length": 96.0,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 502903501.0,
|
|
"train_probe_reward": 1.0524759888648987,
|
|
"train_probe_reward_std": 0.20973487198352814,
|
|
"train_probe_rewards/accuracy_reward": 0.654296875,
|
|
"train_probe_rewards/brier_reward": 0.8249536752700806,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.891845703125,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0013824773486703634,
|
|
"train_probe_rewards/frontier_coverage_1": 0.0867544673383236,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0867544673383236,
|
|
"train_probe_rewards/frontier_coverage_15": 0.0867544673383236,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0867544673383236,
|
|
"train_probe_rewards/frontier_coverage_25": 0.06929008662700653,
|
|
"train_probe_rewards/frontier_coverage_5": 0.0867544673383236,
|
|
"train_probe_rewards/frontier_ece_reward": 0.01398058095946908,
|
|
"train_probe_runtime": 8.5257,
|
|
"train_probe_samples_per_second": 58.646,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4410400390625,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.4765031486749649,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22052001953125,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22052001953125,
|
|
"train_probe_signal/advantage_abs_mean": 0.18870525062084198,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18870525062084198,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.20722128450870514,
|
|
"train_probe_signal/advantage_std": 0.20722128450870514,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.1941361352801323,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.2597276568412781,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024267016910016537,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024267016910016537,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047119140625,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05483095906674862,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005889892578125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005889892578125,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020549558103084564,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0037329471670091152,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6783709219889715e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6783709219889715e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3552343100309372,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4747858941555023,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3552343100309372,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4747858941555023,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3552343100309372,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4747858941555023,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3552343100309372,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4747858941555023,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.24588338285684586,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.33837637305259705,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004401312442496419,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004401312442496419,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3552343100309372,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4747858941555023,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006358693819493055,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.017886138521134853,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.021670137532055378,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022357673151418567,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022357673151418567,
|
|
"train_probe_steps_per_second": 0.235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.36937569815613946,
|
|
"calibration/batch_distribution_entropy": 0.8804409015073205,
|
|
"calibration/buffer_distribution_entropy": 0.9450503390407532,
|
|
"calibration/confidence_entropy": 0.405562550806981,
|
|
"calibration/coverage@0%": 0.078125,
|
|
"calibration/coverage@1%": 0.078125,
|
|
"calibration/coverage@10%": 0.14140625,
|
|
"calibration/coverage@15%": 0.15390625,
|
|
"calibration/coverage@20%": 0.18828125,
|
|
"calibration/coverage@25%": 0.3640625,
|
|
"calibration/coverage@30%": 0.44765625,
|
|
"calibration/coverage@5%": 0.1078125,
|
|
"calibration/ece": 0.15152305063456092,
|
|
"calibration/mean_confidence": 0.5187107282621319,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 678.6,
|
|
"completions/max_terminated_length": 502.6,
|
|
"completions/mean_length": 184.14736328125,
|
|
"completions/mean_terminated_length": 184.01534423828124,
|
|
"completions/min_length": 83.6,
|
|
"completions/min_terminated_length": 83.6,
|
|
"epoch": 0.496,
|
|
"grad_norm": 0.005962767172604799,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 520097010.0,
|
|
"reward": 1.044260597229004,
|
|
"reward_std": 0.0687633216381073,
|
|
"rewards/accuracy_reward": 0.6216796875,
|
|
"rewards/brier_reward": 0.8228810787200928,
|
|
"rewards/confidence_uniqueness_reward": 0.942884886264801,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0015023096697404982,
|
|
"rewards/frontier_coverage_1": 0.1085168793797493,
|
|
"rewards/frontier_coverage_10": 0.1085168793797493,
|
|
"rewards/frontier_coverage_15": 0.1085168793797493,
|
|
"rewards/frontier_coverage_20": 0.1085168793797493,
|
|
"rewards/frontier_coverage_25": 0.08267375081777573,
|
|
"rewards/frontier_coverage_5": 0.1085168793797493,
|
|
"rewards/frontier_ece_reward": 0.01266906913369894,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08406982421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.11525466293096542,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042034912109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042034912109375,
|
|
"signal/advantage_abs_mean": 0.051340526342391966,
|
|
"signal/advantage_pre_scale_abs_mean": 0.051340526342391966,
|
|
"signal/advantage_pre_scale_std": 0.0964614674448967,
|
|
"signal/advantage_std": 0.0964614674448967,
|
|
"signal/brier_reward/centered_abs_mean": 0.11293443143367768,
|
|
"signal/brier_reward/group_std_mean": 0.1477597326040268,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01411680392920971,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01411680392920971,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02416303977370262,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030411677807569502,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030203799717128275,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030203799717128275,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001119971019215882,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018046426121145487,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0047481302754023e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0047481302754023e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1459894895553589,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1935875177383423,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1459894895553589,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1935875177383423,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.1459894895553589,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1935875177383423,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1459894895553589,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1935875177383423,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.101905357837677,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.135857430100441,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018241058802232145,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018241058802232145,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1459894895553589,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1935875177383423,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026132117491215467,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.011225111037492751,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.014030049927532673,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001403138879686594,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001403138879686594,
|
|
"step": 155
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19729564683186535,
|
|
"calibration/batch_distribution_entropy": 0.9365230444547216,
|
|
"calibration/buffer_distribution_entropy": 0.9458546354861728,
|
|
"calibration/confidence_entropy": 0.4251162815208058,
|
|
"calibration/coverage@0%": 0.05625,
|
|
"calibration/coverage@1%": 0.05625,
|
|
"calibration/coverage@10%": 0.409375,
|
|
"calibration/coverage@15%": 0.459375,
|
|
"calibration/coverage@20%": 0.559375,
|
|
"calibration/coverage@25%": 0.634375,
|
|
"calibration/coverage@30%": 0.828125,
|
|
"calibration/coverage@5%": 0.1625,
|
|
"calibration/ece": 0.1667415023265532,
|
|
"calibration/mean_confidence": 0.4842607593122642,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 893.0,
|
|
"completions/max_terminated_length": 456.8,
|
|
"completions/mean_length": 183.923828125,
|
|
"completions/mean_terminated_length": 183.6598693847656,
|
|
"completions/min_length": 90.2,
|
|
"completions/min_terminated_length": 90.2,
|
|
"epoch": 0.512,
|
|
"grad_norm": 0.0023144185543060303,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 537126054.0,
|
|
"reward": 1.0360616207122804,
|
|
"reward_std": 0.07599924206733703,
|
|
"rewards/accuracy_reward": 0.6048828125,
|
|
"rewards/brier_reward": 0.8223283767700196,
|
|
"rewards/confidence_uniqueness_reward": 0.9420121669769287,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.001600394258275628,
|
|
"rewards/frontier_coverage_1": 0.1132353588938713,
|
|
"rewards/frontier_coverage_10": 0.1132353588938713,
|
|
"rewards/frontier_coverage_15": 0.1132353588938713,
|
|
"rewards/frontier_coverage_20": 0.1132353588938713,
|
|
"rewards/frontier_coverage_25": 0.08508779406547547,
|
|
"rewards/frontier_coverage_5": 0.1132353588938713,
|
|
"rewards/frontier_ece_reward": 0.012370448373258115,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0976318359375,
|
|
"signal/accuracy_reward/group_std_mean": 0.13105546683073044,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04881591796875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04881591796875,
|
|
"signal/advantage_abs_mean": 0.05740831717848778,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05740831717848778,
|
|
"signal/advantage_pre_scale_std": 0.10611572861671448,
|
|
"signal/advantage_std": 0.10611572861671448,
|
|
"signal/brier_reward/centered_abs_mean": 0.1118384689092636,
|
|
"signal/brier_reward/group_std_mean": 0.1458802491426468,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01397980861365795,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.01397980861365795,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024859635904431344,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031609703600406644,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003107454488053918,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003107454488053918,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012359362561255693,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001959600206464529,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.212325871369103e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.212325871369103e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14064022451639174,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18756941258907317,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14064022451639174,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18756941258907317,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14064022451639174,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18756941258907317,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14064022451639174,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18756941258907317,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09089281260967255,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12226969897747039,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016269813058897853,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016269813058897853,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14064022451639174,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18756941258907317,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002517459914088249,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010960309766232967,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01376073807477951,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001370038720779121,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001370038720779121,
|
|
"step": 160
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10147454873978934,
|
|
"calibration/batch_distribution_entropy": 0.8797335226096754,
|
|
"calibration/buffer_distribution_entropy": 0.946214670936134,
|
|
"calibration/confidence_entropy": 0.42908605137799893,
|
|
"calibration/coverage@0%": 0.190625,
|
|
"calibration/coverage@1%": 0.190625,
|
|
"calibration/coverage@10%": 0.615625,
|
|
"calibration/coverage@15%": 0.740625,
|
|
"calibration/coverage@20%": 0.8328125,
|
|
"calibration/coverage@25%": 0.8875,
|
|
"calibration/coverage@30%": 0.9671875,
|
|
"calibration/coverage@5%": 0.4078125,
|
|
"calibration/ece": 0.15184928203773357,
|
|
"calibration/mean_confidence": 0.6085124254121397,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 697.0,
|
|
"completions/max_terminated_length": 519.8,
|
|
"completions/mean_length": 186.444140625,
|
|
"completions/mean_terminated_length": 186.31230773925782,
|
|
"completions/min_length": 93.8,
|
|
"completions/min_terminated_length": 93.8,
|
|
"epoch": 0.528,
|
|
"grad_norm": 0.0019444272620603442,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 554064778.0,
|
|
"reward": 1.0289855241775512,
|
|
"reward_std": 0.07206702530384064,
|
|
"rewards/accuracy_reward": 0.58603515625,
|
|
"rewards/brier_reward": 0.8264921069145202,
|
|
"rewards/confidence_uniqueness_reward": 0.9401045680046082,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0016523070633411407,
|
|
"rewards/frontier_coverage_1": 0.13403759896755219,
|
|
"rewards/frontier_coverage_10": 0.13403759896755219,
|
|
"rewards/frontier_coverage_15": 0.13403759896755219,
|
|
"rewards/frontier_coverage_20": 0.13403759896755219,
|
|
"rewards/frontier_coverage_25": 0.09650920405983925,
|
|
"rewards/frontier_coverage_5": 0.13403759896755219,
|
|
"rewards/frontier_ece_reward": 0.011983232945203781,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.096856689453125,
|
|
"signal/accuracy_reward/group_std_mean": 0.13027185052633286,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484283447265625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0484283447265625,
|
|
"signal/advantage_abs_mean": 0.05456642434000969,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05456642434000969,
|
|
"signal/advantage_pre_scale_std": 0.10015368908643722,
|
|
"signal/advantage_std": 0.10015368908643722,
|
|
"signal/brier_reward/centered_abs_mean": 0.11027712374925613,
|
|
"signal/brier_reward/group_std_mean": 0.14253330528736113,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013784640468657017,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013784640468657017,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025565633177757265,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03242117166519165,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003195704147219658,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003195704147219658,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011575968354009091,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001790312142111361,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0720982865896077e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0720982865896077e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.15162838697433473,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19694490134716033,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.15162838697433473,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19694490134716033,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.15162838697433473,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19694490134716033,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.15162838697433473,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19694490134716033,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09777042716741562,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.12786214500665666,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017500906018540264,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017500906018540264,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.15162838697433473,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19694490134716033,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027141480706632136,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00985901989042759,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012534209899604321,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012323774863034487,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012323774863034487,
|
|
"step": 165
|
|
},
|
|
{
|
|
"calibration/aurc": 0.17559047792121124,
|
|
"calibration/batch_distribution_entropy": 0.8843507468483722,
|
|
"calibration/buffer_distribution_entropy": 0.9468903060077588,
|
|
"calibration/confidence_entropy": 0.4229738822725511,
|
|
"calibration/coverage@0%": 0.1265655637254902,
|
|
"calibration/coverage@1%": 0.14765931372549018,
|
|
"calibration/coverage@10%": 0.48027267156862746,
|
|
"calibration/coverage@15%": 0.5803094362745098,
|
|
"calibration/coverage@20%": 0.6623805147058823,
|
|
"calibration/coverage@25%": 0.7264950980392156,
|
|
"calibration/coverage@30%": 0.789828431372549,
|
|
"calibration/coverage@5%": 0.2242218137254902,
|
|
"calibration/ece": 0.11771892982943855,
|
|
"calibration/mean_confidence": 0.6129658324860427,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 566.6,
|
|
"completions/max_terminated_length": 566.6,
|
|
"completions/mean_length": 188.92958984375,
|
|
"completions/mean_terminated_length": 188.92958984375,
|
|
"completions/min_length": 94.0,
|
|
"completions/min_terminated_length": 94.0,
|
|
"epoch": 0.544,
|
|
"grad_norm": 0.0022145204711705446,
|
|
"learning_rate": 1e-06,
|
|
"loss": -0.0,
|
|
"num_tokens": 571163001.0,
|
|
"reward": 1.0449440240859986,
|
|
"reward_std": 0.0750869557261467,
|
|
"rewards/accuracy_reward": 0.62900390625,
|
|
"rewards/brier_reward": 0.8198571324348449,
|
|
"rewards/confidence_uniqueness_reward": 0.9451698303222656,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.001585884322412312,
|
|
"rewards/frontier_coverage_1": 0.08185996562242508,
|
|
"rewards/frontier_coverage_10": 0.08185996562242508,
|
|
"rewards/frontier_coverage_15": 0.08185996562242508,
|
|
"rewards/frontier_coverage_20": 0.08185996562242508,
|
|
"rewards/frontier_coverage_25": 0.0604823037981987,
|
|
"rewards/frontier_coverage_5": 0.08185996562242508,
|
|
"rewards/frontier_ece_reward": 0.011854531429708003,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.102349853515625,
|
|
"signal/accuracy_reward/group_std_mean": 0.13563383221626282,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511749267578125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0511749267578125,
|
|
"signal/advantage_abs_mean": 0.05685669779777527,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05685669779777527,
|
|
"signal/advantage_pre_scale_std": 0.10516398698091507,
|
|
"signal/advantage_std": 0.10516398698091507,
|
|
"signal/brier_reward/centered_abs_mean": 0.11613436192274093,
|
|
"signal/brier_reward/group_std_mean": 0.14911974966526031,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014516795240342616,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014516795240342616,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02417031079530716,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03060316704213619,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003021288849413395,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003021288849413395,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012515761191025376,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001968202483840287,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.240321155113634e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.240321155113634e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14715155959129333,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19198558628559112,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14715155959129333,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19198558628559112,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14715155959129333,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19198558628559112,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14715155959129333,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.19198558628559112,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.09022901803255082,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11834533214569092,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016150993760675192,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016150993760675192,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14715155959129333,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19198558628559112,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026340128388255835,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.010609462484717369,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.01322672814130783,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001326182810589671,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001326182810589671,
|
|
"step": 170
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22587778042468196,
|
|
"calibration/batch_distribution_entropy": 0.9286465435216072,
|
|
"calibration/buffer_distribution_entropy": 0.9467257565889398,
|
|
"calibration/confidence_entropy": 0.4432571774263129,
|
|
"calibration/coverage@0%": 0.015625,
|
|
"calibration/coverage@1%": 0.015625,
|
|
"calibration/coverage@10%": 0.42265625,
|
|
"calibration/coverage@15%": 0.47109375,
|
|
"calibration/coverage@20%": 0.49453125,
|
|
"calibration/coverage@25%": 0.65859375,
|
|
"calibration/coverage@30%": 0.71171875,
|
|
"calibration/coverage@5%": 0.26875,
|
|
"calibration/ece": 0.14173538803059738,
|
|
"calibration/mean_confidence": 0.5470402575486807,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 657.2,
|
|
"completions/max_terminated_length": 440.8,
|
|
"completions/mean_length": 186.93251953125,
|
|
"completions/mean_terminated_length": 186.66925354003905,
|
|
"completions/min_length": 89.4,
|
|
"completions/min_terminated_length": 89.4,
|
|
"epoch": 0.56,
|
|
"grad_norm": 0.0018579477909952402,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.001,
|
|
"num_tokens": 587898598.0,
|
|
"reward": 1.0260156869888306,
|
|
"reward_std": 0.0709751732647419,
|
|
"rewards/accuracy_reward": 0.57705078125,
|
|
"rewards/brier_reward": 0.8331053018569946,
|
|
"rewards/confidence_uniqueness_reward": 0.9434573888778687,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0018543781014159321,
|
|
"rewards/frontier_coverage_1": 0.13779235035181045,
|
|
"rewards/frontier_coverage_10": 0.13779235035181045,
|
|
"rewards/frontier_coverage_15": 0.13779235035181045,
|
|
"rewards/frontier_coverage_20": 0.13634179830551146,
|
|
"rewards/frontier_coverage_25": 0.09466939568519592,
|
|
"rewards/frontier_coverage_5": 0.13779235035181045,
|
|
"rewards/frontier_ece_reward": 0.012398156523704528,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.079791259765625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1126218855381012,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398956298828125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0398956298828125,
|
|
"signal/advantage_abs_mean": 0.05201718434691429,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05201718434691429,
|
|
"signal/advantage_pre_scale_std": 0.09894705563783646,
|
|
"signal/advantage_std": 0.09894705563783646,
|
|
"signal/brier_reward/centered_abs_mean": 0.1116187259554863,
|
|
"signal/brier_reward/group_std_mean": 0.14699228405952453,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013952340744435788,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013952340744435788,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02636619359254837,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.033408934623003005,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032957741990685464,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032957741990685464,
|
|
"signal/format_reward/centered_abs_mean": 0.0003662109375,
|
|
"signal/format_reward/group_std_mean": 0.000768545875325799,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014577839057892561,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023339309729635714,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6094331042259e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6094331042259e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13344906717538835,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17753869891166688,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13344906717538835,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17753869891166688,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13344906717538835,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17753869891166688,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1315935179591179,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1751266449689865,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002355523919686675,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002355523919686675,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08300138115882874,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11093302965164184,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00148572470061481,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00148572470061481,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13344906717538835,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17753869891166688,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002388738188892603,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.009788069687783719,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.012408962100744247,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012235087109729649,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012235087109729649,
|
|
"step": 175
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2964656946460727,
|
|
"calibration/batch_distribution_entropy": 0.9168176730321715,
|
|
"calibration/buffer_distribution_entropy": 0.9476380754401748,
|
|
"calibration/confidence_entropy": 0.4257376662970197,
|
|
"calibration/coverage@0%": 0.11015625,
|
|
"calibration/coverage@1%": 0.1140625,
|
|
"calibration/coverage@10%": 0.23984375,
|
|
"calibration/coverage@15%": 0.32734375,
|
|
"calibration/coverage@20%": 0.3671875,
|
|
"calibration/coverage@25%": 0.48046875,
|
|
"calibration/coverage@30%": 0.578125,
|
|
"calibration/coverage@5%": 0.165625,
|
|
"calibration/ece": 0.1658030474792637,
|
|
"calibration/mean_confidence": 0.5411404430872265,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00048828125,
|
|
"completions/max_length": 1305.2,
|
|
"completions/max_terminated_length": 468.2,
|
|
"completions/mean_length": 187.0533203125,
|
|
"completions/mean_terminated_length": 186.39432373046876,
|
|
"completions/min_length": 68.0,
|
|
"completions/min_terminated_length": 68.0,
|
|
"epoch": 0.576,
|
|
"grad_norm": 0.3020451068878174,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0013,
|
|
"num_tokens": 605000648.0,
|
|
"reward": 1.030449903011322,
|
|
"reward_std": 0.06246692091226578,
|
|
"rewards/accuracy_reward": 0.59111328125,
|
|
"rewards/brier_reward": 0.8259658694267273,
|
|
"rewards/confidence_uniqueness_reward": 0.9397091507911682,
|
|
"rewards/format_reward": 0.99931640625,
|
|
"rewards/frontier_aurc_reward": -0.0018171647796407342,
|
|
"rewards/frontier_coverage_1": 0.1292146548628807,
|
|
"rewards/frontier_coverage_10": 0.1292146548628807,
|
|
"rewards/frontier_coverage_15": 0.1292146548628807,
|
|
"rewards/frontier_coverage_20": 0.12564596012234688,
|
|
"rewards/frontier_coverage_25": 0.08896546289324761,
|
|
"rewards/frontier_coverage_5": 0.1292146548628807,
|
|
"rewards/frontier_ece_reward": 0.011719273403286934,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.066680908203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.09644376039505005,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0333404541015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0333404541015625,
|
|
"signal/advantage_abs_mean": 0.04487398453056812,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04487398453056812,
|
|
"signal/advantage_pre_scale_std": 0.08947417140007019,
|
|
"signal/advantage_std": 0.08947417140007019,
|
|
"signal/brier_reward/centered_abs_mean": 0.10441422760486603,
|
|
"signal/brier_reward/group_std_mean": 0.13690564334392546,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013051778450608254,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013051778450608254,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028500469401478767,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03677135743200779,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003562558675184846,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003562558675184846,
|
|
"signal/format_reward/centered_abs_mean": 0.001300048828125,
|
|
"signal/format_reward/group_std_mean": 0.0031943732406944036,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012849176069721579,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.001977930567227304,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3000024521024896e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3000024521024896e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13109023869037628,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1723720222711563,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13109023869037628,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1723720222711563,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13109023869037628,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1723720222711563,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1257859319448471,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.16545325815677642,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022515680640935896,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022515680640935896,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08115749582648277,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10648612678050995,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527191407978535,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527191407978535,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13109023869037628,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1723720222711563,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023465151432901623,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00884333048015833,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011235564388334751,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011054163100197912,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011054163100197912,
|
|
"step": 180
|
|
},
|
|
{
|
|
"calibration/aurc": 0.3515624446654767,
|
|
"calibration/batch_distribution_entropy": 0.8888446214443768,
|
|
"calibration/buffer_distribution_entropy": 0.9482682908968807,
|
|
"calibration/confidence_entropy": 0.4005931618908181,
|
|
"calibration/coverage@0%": 0.07814644607843138,
|
|
"calibration/coverage@1%": 0.07814644607843138,
|
|
"calibration/coverage@10%": 0.252469362745098,
|
|
"calibration/coverage@15%": 0.3150183823529412,
|
|
"calibration/coverage@20%": 0.3674172794117647,
|
|
"calibration/coverage@25%": 0.41276654411764707,
|
|
"calibration/coverage@30%": 0.5190686274509804,
|
|
"calibration/coverage@5%": 0.15006740196078433,
|
|
"calibration/ece": 0.12824287845261145,
|
|
"calibration/mean_confidence": 0.5009771199979778,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 1110.8,
|
|
"completions/max_terminated_length": 451.4,
|
|
"completions/mean_length": 186.34189453125,
|
|
"completions/mean_terminated_length": 185.94619750976562,
|
|
"completions/min_length": 85.4,
|
|
"completions/min_terminated_length": 85.4,
|
|
"epoch": 0.592,
|
|
"grad_norm": 0.0023227103520184755,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0009,
|
|
"num_tokens": 622076501.0,
|
|
"reward": 1.0253287315368653,
|
|
"reward_std": 0.07493966221809387,
|
|
"rewards/accuracy_reward": 0.584375,
|
|
"rewards/brier_reward": 0.8193248152732849,
|
|
"rewards/confidence_uniqueness_reward": 0.9278930783271789,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0017773719038814307,
|
|
"rewards/frontier_coverage_1": 0.1330685704946518,
|
|
"rewards/frontier_coverage_10": 0.1330685704946518,
|
|
"rewards/frontier_coverage_15": 0.1330685704946518,
|
|
"rewards/frontier_coverage_20": 0.1282924994826317,
|
|
"rewards/frontier_coverage_25": 0.0903812974691391,
|
|
"rewards/frontier_coverage_5": 0.1330685704946518,
|
|
"rewards/frontier_ece_reward": 0.011802474223077297,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.09986572265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.1311745300889015,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049932861328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049932861328125,
|
|
"signal/advantage_abs_mean": 0.05721670612692833,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05721670612692833,
|
|
"signal/advantage_pre_scale_std": 0.10559385418891906,
|
|
"signal/advantage_std": 0.10559385418891906,
|
|
"signal/brier_reward/centered_abs_mean": 0.11419818848371506,
|
|
"signal/brier_reward/group_std_mean": 0.14777041971683502,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274773560464383,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.014274773560464383,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03568760454654694,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.04578934088349342,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004460950568318367,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004460950568318367,
|
|
"signal/format_reward/centered_abs_mean": 0.000567626953125,
|
|
"signal/format_reward/group_std_mean": 0.0016572814900428056,
|
|
"signal/format_reward/group_zero_std_frac": 0.990625,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014281244948506355,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022319577634334563,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.55634276982164e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.55634276982164e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14765068590641023,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1923435479402542,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14765068590641023,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1923435479402542,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14765068590641023,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1923435479402542,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.13963269293308259,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18203844726085663,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002499425271525979,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002499425271525979,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08843920975923539,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11563192903995514,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015830618096515537,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015830618096515537,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14765068590641023,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1923435479402542,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026429472491145134,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008998825587332249,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.011416062340140342,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011248531984165311,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011248531984165311,
|
|
"step": 185
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19760200424062405,
|
|
"calibration/batch_distribution_entropy": 0.843288138266779,
|
|
"calibration/buffer_distribution_entropy": 0.9480241421807527,
|
|
"calibration/confidence_entropy": 0.3636719512707667,
|
|
"calibration/coverage@0%": 0.2765625,
|
|
"calibration/coverage@1%": 0.2984375,
|
|
"calibration/coverage@10%": 0.5328125,
|
|
"calibration/coverage@15%": 0.6515625,
|
|
"calibration/coverage@20%": 0.70703125,
|
|
"calibration/coverage@25%": 0.75,
|
|
"calibration/coverage@30%": 0.78125,
|
|
"calibration/coverage@5%": 0.44375,
|
|
"calibration/ece": 0.16141309983740632,
|
|
"calibration/mean_confidence": 0.5030459626625937,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 570.6,
|
|
"completions/max_terminated_length": 570.6,
|
|
"completions/mean_length": 187.1,
|
|
"completions/mean_terminated_length": 187.1,
|
|
"completions/min_length": 89.4,
|
|
"completions/min_terminated_length": 89.4,
|
|
"epoch": 0.608,
|
|
"grad_norm": 0.0017206113552674651,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 638991893.0,
|
|
"reward": 1.0279302835464477,
|
|
"reward_std": 0.05821175277233124,
|
|
"rewards/accuracy_reward": 0.5734375,
|
|
"rewards/brier_reward": 0.8467367172241211,
|
|
"rewards/confidence_uniqueness_reward": 0.9279510498046875,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.001405553543008864,
|
|
"rewards/frontier_coverage_1": 0.17639144659042358,
|
|
"rewards/frontier_coverage_10": 0.17639144659042358,
|
|
"rewards/frontier_coverage_15": 0.17639144659042358,
|
|
"rewards/frontier_coverage_20": 0.17281466871500015,
|
|
"rewards/frontier_coverage_25": 0.11612609624862671,
|
|
"rewards/frontier_coverage_5": 0.17639144659042358,
|
|
"rewards/frontier_ece_reward": 0.012792413122951984,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.081982421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10979770123958588,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409912109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409912109375,
|
|
"signal/advantage_abs_mean": 0.043833667784929274,
|
|
"signal/advantage_pre_scale_abs_mean": 0.043833667784929274,
|
|
"signal/advantage_pre_scale_std": 0.08639424741268158,
|
|
"signal/advantage_std": 0.08639424741268158,
|
|
"signal/brier_reward/centered_abs_mean": 0.10321188867092132,
|
|
"signal/brier_reward/group_std_mean": 0.1343301758170128,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012901486083865165,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012901486083865165,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03328895568847656,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0417523019015789,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00416111946105957,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00416111946105957,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0010029817116446794,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0015215349150821567,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7953372116608078e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7953372116608078e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14954022765159608,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19398094117641448,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14954022765159608,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19398094117641448,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14954022765159608,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19398094117641448,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.14172202944755555,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.18404050469398497,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002536824205890298,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002536824205890298,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08968007564544678,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.11650702059268951,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016052733408287168,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016052733408287168,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14954022765159608,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19398094117641448,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002676770044490695,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.008176222257316113,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010366989858448505,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010220277821645142,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010220277821645142,
|
|
"step": 190
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16803935733946962,
|
|
"calibration/batch_distribution_entropy": 0.8950542863389088,
|
|
"calibration/buffer_distribution_entropy": 0.9478421140046102,
|
|
"calibration/confidence_entropy": 0.4051885790306386,
|
|
"calibration/coverage@0%": 0.16640625,
|
|
"calibration/coverage@1%": 0.17109375,
|
|
"calibration/coverage@10%": 0.540625,
|
|
"calibration/coverage@15%": 0.609375,
|
|
"calibration/coverage@20%": 0.67265625,
|
|
"calibration/coverage@25%": 0.71484375,
|
|
"calibration/coverage@30%": 0.75390625,
|
|
"calibration/coverage@5%": 0.41953125,
|
|
"calibration/ece": 0.11327187991929444,
|
|
"calibration/mean_confidence": 0.5317124950807055,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 713.6,
|
|
"completions/max_terminated_length": 501.8,
|
|
"completions/mean_length": 188.14892578125,
|
|
"completions/mean_terminated_length": 188.01675415039062,
|
|
"completions/min_length": 91.0,
|
|
"completions/min_terminated_length": 91.0,
|
|
"epoch": 0.624,
|
|
"grad_norm": 0.0019219900714233518,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 656262442.0,
|
|
"reward": 1.0314565539360045,
|
|
"reward_std": 0.06776490807533264,
|
|
"rewards/accuracy_reward": 0.58193359375,
|
|
"rewards/brier_reward": 0.8456698775291442,
|
|
"rewards/confidence_uniqueness_reward": 0.93500657081604,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0015101159922778606,
|
|
"rewards/frontier_coverage_1": 0.16571835279464722,
|
|
"rewards/frontier_coverage_10": 0.16571835279464722,
|
|
"rewards/frontier_coverage_15": 0.16571835279464722,
|
|
"rewards/frontier_coverage_20": 0.15580750107765198,
|
|
"rewards/frontier_coverage_25": 0.10798413306474686,
|
|
"rewards/frontier_coverage_5": 0.16571835279464722,
|
|
"rewards/frontier_ece_reward": 0.011540688015520573,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.094268798828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.12409499287605286,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0471343994140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0471343994140625,
|
|
"signal/advantage_abs_mean": 0.05176782011985779,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05176782011985779,
|
|
"signal/advantage_pre_scale_std": 0.09750174582004548,
|
|
"signal/advantage_std": 0.09750174582004548,
|
|
"signal/brier_reward/centered_abs_mean": 0.10846467316150665,
|
|
"signal/brier_reward/group_std_mean": 0.14180308282375337,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013558084145188332,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013558084145188332,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030897776782512664,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03884159214794636,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003862222097814083,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003862222097814083,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011383457691408693,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0017884798115119338,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.03763887839159e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.03763887839159e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14944371283054353,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19577408730983734,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14944371283054353,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.19577408730983734,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.14944371283054353,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.19577408730983734,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.1349347472190857,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.17690467536449433,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002415331965312362,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002415331965312362,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.08353340923786164,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.10984267294406891,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014952480327337981,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014952480327337981,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14944371283054353,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19577408730983734,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002675042301416397,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007968425843864679,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.010179330036044121,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009960532304830849,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009960532304830849,
|
|
"step": 195
|
|
},
|
|
{
|
|
"calibration/aurc": 0.20889160874971036,
|
|
"calibration/batch_distribution_entropy": 0.9089865199203135,
|
|
"calibration/buffer_distribution_entropy": 0.9479906632560786,
|
|
"calibration/confidence_entropy": 0.4287082953337954,
|
|
"calibration/coverage@0%": 0.09921875,
|
|
"calibration/coverage@1%": 0.11796875,
|
|
"calibration/coverage@10%": 0.40703125,
|
|
"calibration/coverage@15%": 0.46484375,
|
|
"calibration/coverage@20%": 0.63671875,
|
|
"calibration/coverage@25%": 0.6953125,
|
|
"calibration/coverage@30%": 0.75078125,
|
|
"calibration/coverage@5%": 0.3515625,
|
|
"calibration/ece": 0.1674316742601562,
|
|
"calibration/mean_confidence": 0.5789995757398437,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 869.4,
|
|
"completions/max_terminated_length": 445.4,
|
|
"completions/mean_length": 196.52490234375,
|
|
"completions/mean_terminated_length": 196.00135192871093,
|
|
"completions/min_length": 96.2,
|
|
"completions/min_terminated_length": 96.2,
|
|
"epoch": 0.64,
|
|
"grad_norm": 0.0018030045321211219,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0012,
|
|
"num_tokens": 673617545.0,
|
|
"reward": 1.0461254596710206,
|
|
"reward_std": 0.057253798097372056,
|
|
"rewards/accuracy_reward": 0.6255859375,
|
|
"rewards/brier_reward": 0.8291385531425476,
|
|
"rewards/confidence_uniqueness_reward": 0.9392925262451172,
|
|
"rewards/format_reward": 0.999609375,
|
|
"rewards/frontier_aurc_reward": -0.001513039879500866,
|
|
"rewards/frontier_coverage_1": 0.11257308274507523,
|
|
"rewards/frontier_coverage_10": 0.11257308274507523,
|
|
"rewards/frontier_coverage_15": 0.11257308274507523,
|
|
"rewards/frontier_coverage_20": 0.10006719529628753,
|
|
"rewards/frontier_coverage_25": 0.07489581555128097,
|
|
"rewards/frontier_coverage_5": 0.11257308274507523,
|
|
"rewards/frontier_ece_reward": 0.010471446067094803,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.06611328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.09317785650491714,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.033056640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.033056640625,
|
|
"signal/advantage_abs_mean": 0.04163134917616844,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04163134917616844,
|
|
"signal/advantage_pre_scale_std": 0.0858407735824585,
|
|
"signal/advantage_std": 0.0858407735824585,
|
|
"signal/brier_reward/centered_abs_mean": 0.09937669783830642,
|
|
"signal/brier_reward/group_std_mean": 0.13064824044704437,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012422087229788303,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012422087229788303,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02863082177937031,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03645128607749939,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035788527224212886,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035788527224212886,
|
|
"signal/format_reward/centered_abs_mean": 0.00072021484375,
|
|
"signal/format_reward/group_std_mean": 0.0014778789598494768,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000360107421875,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000360107421875,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011738982051610948,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0018677733605727553,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1012776051065886e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1012776051065886e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12463102638721466,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16538253724575042,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12463102638721466,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16538253724575042,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.12463102638721466,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.16538253724575042,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.10897718667984009,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.1449252337217331,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019506915938109159,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019506915938109159,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06840595453977585,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0906538799405098,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001224466529674828,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001224466529674828,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12463102638721466,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16538253724575042,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022308954037725927,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.007416488416492939,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.009534438140690327,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009270610520616174,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009270610520616174,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"eval_calibration/aurc": 0.3420038089344068,
|
|
"eval_calibration/batch_distribution_entropy": 0.9040885318071357,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9499923798968546,
|
|
"eval_calibration/confidence_entropy": 0.43239845984155695,
|
|
"eval_calibration/coverage@0%": 0.125,
|
|
"eval_calibration/coverage@1%": 0.125,
|
|
"eval_calibration/coverage@10%": 0.203125,
|
|
"eval_calibration/coverage@15%": 0.203125,
|
|
"eval_calibration/coverage@20%": 0.28125,
|
|
"eval_calibration/coverage@25%": 0.359375,
|
|
"eval_calibration/coverage@30%": 0.390625,
|
|
"eval_calibration/coverage@5%": 0.125,
|
|
"eval_calibration/ece": 0.13722656249999998,
|
|
"eval_calibration/mean_confidence": 0.4819140625,
|
|
"eval_completions/clipped_ratio": 0.001953125,
|
|
"eval_completions/max_length": 964.5,
|
|
"eval_completions/max_terminated_length": 382.0,
|
|
"eval_completions/mean_length": 200.27852630615234,
|
|
"eval_completions/mean_terminated_length": 197.66400146484375,
|
|
"eval_completions/min_length": 107.0,
|
|
"eval_completions/min_terminated_length": 107.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 673617545.0,
|
|
"eval_reward": 0.9530201256275177,
|
|
"eval_reward_std": 0.23500938713550568,
|
|
"eval_rewards/accuracy_reward": 0.439453125,
|
|
"eval_rewards/brier_reward": 0.8035316169261932,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.889798104763031,
|
|
"eval_rewards/format_reward": 0.998046875,
|
|
"eval_rewards/frontier_aurc_reward": -0.002739873481914401,
|
|
"eval_rewards/frontier_coverage_1": 0.22667521983385086,
|
|
"eval_rewards/frontier_coverage_10": 0.22667521983385086,
|
|
"eval_rewards/frontier_coverage_15": 0.22667521983385086,
|
|
"eval_rewards/frontier_coverage_20": 0.1793447956442833,
|
|
"eval_rewards/frontier_coverage_25": 0.11752147227525711,
|
|
"eval_rewards/frontier_coverage_5": 0.22667521983385086,
|
|
"eval_rewards/frontier_ece_reward": 0.008872916921973228,
|
|
"eval_runtime": 19.7075,
|
|
"eval_samples_per_second": 25.371,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4754638671875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49497611820697784,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23773193359375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23773193359375,
|
|
"eval_signal/advantage_abs_mean": 0.21505261212587357,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.21505261212587357,
|
|
"eval_signal/advantage_pre_scale_std": 0.2322075515985489,
|
|
"eval_signal/advantage_std": 0.2322075515985489,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.22212185710668564,
|
|
"eval_signal/brier_reward/group_std_mean": 0.27625299990177155,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027765232138335705,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027765232138335705,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04621247202157974,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.059811294078826904,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005776559002697468,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005776559002697468,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
|
|
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
|
|
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003157320083118975,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005664329044520855,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6516028053010814e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6516028053010814e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.382648304104805,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.46173766255378723,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.382648304104805,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.46173766255378723,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.382648304104805,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.46173766255378723,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2975280433893204,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.36239591240882874,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005325751379132271,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005325751379132271,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1779879480600357,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.22136619687080383,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031859842129051685,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031859842129051685,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.382648304104805,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.46173766255378723,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0068494039587676525,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013139450456947088,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.015696686692535877,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001642431307118386,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001642431307118386,
|
|
"eval_steps_per_second": 0.101,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"step": 200,
|
|
"train_probe_calibration/aurc": 0.14380049777966852,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.8924841198751866,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9501962946765351,
|
|
"train_probe_calibration/confidence_entropy": 0.4064653950766257,
|
|
"train_probe_calibration/coverage@0%": 0.0625,
|
|
"train_probe_calibration/coverage@1%": 0.0625,
|
|
"train_probe_calibration/coverage@10%": 0.6875,
|
|
"train_probe_calibration/coverage@15%": 0.796875,
|
|
"train_probe_calibration/coverage@20%": 0.875,
|
|
"train_probe_calibration/coverage@25%": 0.921875,
|
|
"train_probe_calibration/coverage@30%": 0.96875,
|
|
"train_probe_calibration/coverage@5%": 0.0625,
|
|
"train_probe_calibration/ece": 0.21375468749999998,
|
|
"train_probe_calibration/mean_confidence": 0.5793390625,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 316.5,
|
|
"train_probe_completions/max_terminated_length": 316.5,
|
|
"train_probe_completions/mean_length": 195.48914337158203,
|
|
"train_probe_completions/mean_terminated_length": 195.48914337158203,
|
|
"train_probe_completions/min_length": 101.0,
|
|
"train_probe_completions/min_terminated_length": 101.0,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 673617545.0,
|
|
"train_probe_reward": 1.0627794861793518,
|
|
"train_probe_reward_std": 0.2138308882713318,
|
|
"train_probe_rewards/accuracy_reward": 0.66796875,
|
|
"train_probe_rewards/brier_reward": 0.8411527872085571,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.90185546875,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.0011366689577698708,
|
|
"train_probe_rewards/frontier_coverage_1": 0.09746142104268074,
|
|
"train_probe_rewards/frontier_coverage_10": 0.09746142104268074,
|
|
"train_probe_rewards/frontier_coverage_15": 0.09746142104268074,
|
|
"train_probe_rewards/frontier_coverage_20": 0.08396613597869873,
|
|
"train_probe_rewards/frontier_coverage_25": 0.06902317516505718,
|
|
"train_probe_rewards/frontier_coverage_5": 0.09746142104268074,
|
|
"train_probe_rewards/frontier_ece_reward": 0.009781356435269117,
|
|
"train_probe_runtime": 9.0902,
|
|
"train_probe_samples_per_second": 55.004,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.434326171875,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.4729345738887787,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2171630859375,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2171630859375,
|
|
"train_probe_signal/advantage_abs_mean": 0.19102784246206284,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.19102784246206284,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.21111667901277542,
|
|
"train_probe_signal/advantage_std": 0.21111667901277542,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.1861182525753975,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.24657447636127472,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023264781571924686,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.023264781571924686,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039306640625,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.046158455312252045,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004913330078125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004913330078125,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0019272951176390052,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0035583705175668,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.449858195381239e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.449858195381239e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3332698345184326,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.45197173953056335,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3332698345184326,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.45197173953056335,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3332698345184326,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.45197173953056335,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.256347618997097,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.35391244292259216,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045886223670095205,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045886223670095205,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1468118354678154,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.20914901793003082,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026279317680746317,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026279317680746317,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3332698345184326,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.45197173953056335,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005965529475361109,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.012552765663713217,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.014878344256430864,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001569095707964152,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001569095707964152,
|
|
"train_probe_steps_per_second": 0.22
|
|
},
|
|
{
|
|
"calibration/aurc": 0.24817290495089597,
|
|
"calibration/batch_distribution_entropy": 0.9204595927573562,
|
|
"calibration/buffer_distribution_entropy": 0.9519832870818661,
|
|
"calibration/confidence_entropy": 0.4577226375814408,
|
|
"calibration/coverage@0%": 0.025,
|
|
"calibration/coverage@1%": 0.1078125,
|
|
"calibration/coverage@10%": 0.3125,
|
|
"calibration/coverage@15%": 0.35234375,
|
|
"calibration/coverage@20%": 0.4484375,
|
|
"calibration/coverage@25%": 0.49609375,
|
|
"calibration/coverage@30%": 0.6015625,
|
|
"calibration/coverage@5%": 0.240625,
|
|
"calibration/ece": 0.1720056808846374,
|
|
"calibration/mean_confidence": 0.5100622878653626,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 656.8,
|
|
"completions/max_terminated_length": 496.6,
|
|
"completions/mean_length": 199.54921875,
|
|
"completions/mean_terminated_length": 199.4187744140625,
|
|
"completions/min_length": 99.0,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.656,
|
|
"grad_norm": 0.0016656734514981508,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 690517473.0,
|
|
"reward": 1.0183162331581115,
|
|
"reward_std": 0.06802579239010811,
|
|
"rewards/accuracy_reward": 0.57275390625,
|
|
"rewards/brier_reward": 0.8095171332359314,
|
|
"rewards/confidence_uniqueness_reward": 0.94515380859375,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0018861858639866113,
|
|
"rewards/frontier_coverage_1": 0.12252766788005828,
|
|
"rewards/frontier_coverage_10": 0.12252766788005828,
|
|
"rewards/frontier_coverage_15": 0.12252766788005828,
|
|
"rewards/frontier_coverage_20": 0.09871871173381805,
|
|
"rewards/frontier_coverage_25": 0.0694797769188881,
|
|
"rewards/frontier_coverage_5": 0.12252766788005828,
|
|
"rewards/frontier_ece_reward": 0.007234203815460205,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.083831787109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11424745023250579,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419158935546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0419158935546875,
|
|
"signal/advantage_abs_mean": 0.05140817314386368,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05140817314386368,
|
|
"signal/advantage_pre_scale_std": 0.09806035608053207,
|
|
"signal/advantage_std": 0.09806035608053207,
|
|
"signal/brier_reward/centered_abs_mean": 0.1077189490199089,
|
|
"signal/brier_reward/group_std_mean": 0.13838136196136475,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013464868627488613,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013464868627488613,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421807125210762,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030718856677412986,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030272589065134525,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030272589065134525,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013030647998675704,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0020229590591043234,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3324858921114357e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3324858921114357e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13521387726068496,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17738903760910035,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13521387726068496,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17738903760910035,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13521387726068496,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.17738903760910035,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.09841903001070022,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.129475200176239,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017617005854845048,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017617005854845048,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06278965771198272,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08188406601548195,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011239348677918315,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011239348677918315,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13521387726068496,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17738903760910035,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002420328464359045,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.006564310565590858,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.008403288014233113,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008205388206988573,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008205388206988573,
|
|
"step": 205
|
|
},
|
|
{
|
|
"calibration/aurc": 0.23136369662906525,
|
|
"calibration/batch_distribution_entropy": 0.8958516870167121,
|
|
"calibration/buffer_distribution_entropy": 0.9554491916207283,
|
|
"calibration/confidence_entropy": 0.43047974707442044,
|
|
"calibration/coverage@0%": 0.07734375,
|
|
"calibration/coverage@1%": 0.07734375,
|
|
"calibration/coverage@10%": 0.31953125,
|
|
"calibration/coverage@15%": 0.3921875,
|
|
"calibration/coverage@20%": 0.44375,
|
|
"calibration/coverage@25%": 0.5265625,
|
|
"calibration/coverage@30%": 0.60625,
|
|
"calibration/coverage@5%": 0.23203125,
|
|
"calibration/ece": 0.1621687781762295,
|
|
"calibration/mean_confidence": 0.5560812218237705,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 436.2,
|
|
"completions/max_terminated_length": 436.2,
|
|
"completions/mean_length": 204.09091796875,
|
|
"completions/mean_terminated_length": 204.09091796875,
|
|
"completions/min_length": 102.6,
|
|
"completions/min_terminated_length": 102.6,
|
|
"epoch": 0.672,
|
|
"grad_norm": 0.0015550514217466116,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 707520804.0,
|
|
"reward": 1.026635193824768,
|
|
"reward_std": 0.06110656931996346,
|
|
"rewards/accuracy_reward": 0.5787109375,
|
|
"rewards/brier_reward": 0.8387270212173462,
|
|
"rewards/confidence_uniqueness_reward": 0.9363265991210937,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.00141967604868114,
|
|
"rewards/frontier_coverage_1": 0.15640246868133545,
|
|
"rewards/frontier_coverage_10": 0.15640246868133545,
|
|
"rewards/frontier_coverage_15": 0.1499548703432083,
|
|
"rewards/frontier_coverage_20": 0.10669813752174377,
|
|
"rewards/frontier_coverage_25": 0.08252269625663758,
|
|
"rewards/frontier_coverage_5": 0.15640246868133545,
|
|
"rewards/frontier_ece_reward": 0.0076270273886621,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0878173828125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1166835829615593,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04390869140625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04390869140625,
|
|
"signal/advantage_abs_mean": 0.04624823108315468,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04624823108315468,
|
|
"signal/advantage_pre_scale_std": 0.09073985815048217,
|
|
"signal/advantage_std": 0.09073985815048217,
|
|
"signal/brier_reward/centered_abs_mean": 0.1011570304632187,
|
|
"signal/brier_reward/group_std_mean": 0.13063574135303496,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012644628807902337,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012644628807902337,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028185939788818358,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03486784622073173,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035232424736022947,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035232424736022947,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.00101193260634318,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0015801386674866081,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8113592523150147e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8113592523150147e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14319152235984803,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.18724198639392853,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.14319152235984803,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.18724198639392853,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.13852950036525727,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.18155628740787505,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024796778801828624,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024796778801828624,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.08701228499412536,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.11472053080797195,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00155751989223063,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00155751989223063,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05806139260530472,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07511216998100281,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010392988799139858,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010392988799139858,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14319152235984803,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.18724198639392853,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025631281081587077,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.005760752130299807,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.007287882454693318,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007200940162874758,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007200940162874758,
|
|
"step": 210
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2169351861265752,
|
|
"calibration/batch_distribution_entropy": 0.906085427880875,
|
|
"calibration/buffer_distribution_entropy": 0.95874229714237,
|
|
"calibration/confidence_entropy": 0.4680126493425715,
|
|
"calibration/coverage@0%": 0.09546875,
|
|
"calibration/coverage@1%": 0.09546875,
|
|
"calibration/coverage@10%": 0.4090379901960784,
|
|
"calibration/coverage@15%": 0.5012714460784313,
|
|
"calibration/coverage@20%": 0.5692738970588235,
|
|
"calibration/coverage@25%": 0.6333884803921569,
|
|
"calibration/coverage@30%": 0.7037837009803922,
|
|
"calibration/coverage@5%": 0.2745772058823529,
|
|
"calibration/ece": 0.1512060885389182,
|
|
"calibration/mean_confidence": 0.5770298367061799,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 668.2,
|
|
"completions/max_terminated_length": 486.8,
|
|
"completions/mean_length": 212.39873046875,
|
|
"completions/mean_terminated_length": 212.26973571777344,
|
|
"completions/min_length": 104.2,
|
|
"completions/min_terminated_length": 104.2,
|
|
"epoch": 0.688,
|
|
"grad_norm": 0.001888699596747756,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 724649687.0,
|
|
"reward": 1.0408958196640015,
|
|
"reward_std": 0.06385916396975518,
|
|
"rewards/accuracy_reward": 0.61318359375,
|
|
"rewards/brier_reward": 0.8376299142837524,
|
|
"rewards/confidence_uniqueness_reward": 0.9420908451080322,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.001473946124315262,
|
|
"rewards/frontier_coverage_1": 0.12142772302031517,
|
|
"rewards/frontier_coverage_10": 0.12142772302031517,
|
|
"rewards/frontier_coverage_15": 0.10840724855661392,
|
|
"rewards/frontier_coverage_20": 0.07632499039173127,
|
|
"rewards/frontier_coverage_25": 0.0743546724319458,
|
|
"rewards/frontier_coverage_5": 0.12142772302031517,
|
|
"rewards/frontier_ece_reward": 0.0060465382412076,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.085443115234375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11665472537279128,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0427215576171875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0427215576171875,
|
|
"signal/advantage_abs_mean": 0.04668809846043587,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04668809846043587,
|
|
"signal/advantage_pre_scale_std": 0.09376905411481858,
|
|
"signal/advantage_std": 0.09376905411481858,
|
|
"signal/brier_reward/centered_abs_mean": 0.09790285527706147,
|
|
"signal/brier_reward/group_std_mean": 0.1284557342529297,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012237856909632683,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012237856909632683,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02564612701535225,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03227175809442997,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003205765876919031,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003205765876919031,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011468618642538786,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0019072068389505148,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0528827008092776e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0528827008092776e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13615999221801758,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1743500828742981,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13615999221801758,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1743500828742981,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.11814617216587067,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.15128694474697113,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002114816382527351,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002114816382527351,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.07219749391078949,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.09231588244438171,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012923351023346186,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012923351023346186,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05291619151830673,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.06708001494407653,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009471998200751841,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009471998200751841,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13615999221801758,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1743500828742981,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024372637271881104,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004961969796568155,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.00630278754979372,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006202462245710194,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006202462245710194,
|
|
"step": 215
|
|
},
|
|
{
|
|
"calibration/aurc": 0.11948550052681228,
|
|
"calibration/batch_distribution_entropy": 0.7879994954219934,
|
|
"calibration/buffer_distribution_entropy": 0.9586850802270618,
|
|
"calibration/confidence_entropy": 0.3649859275848222,
|
|
"calibration/coverage@0%": 0.265625,
|
|
"calibration/coverage@1%": 0.26796875,
|
|
"calibration/coverage@10%": 0.496875,
|
|
"calibration/coverage@15%": 0.634375,
|
|
"calibration/coverage@20%": 0.78203125,
|
|
"calibration/coverage@25%": 0.83515625,
|
|
"calibration/coverage@30%": 0.8984375,
|
|
"calibration/coverage@5%": 0.3890625,
|
|
"calibration/ece": 0.13410406490920107,
|
|
"calibration/mean_confidence": 0.6672344767574656,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 652.2,
|
|
"completions/max_terminated_length": 434.8,
|
|
"completions/mean_length": 211.9447265625,
|
|
"completions/mean_terminated_length": 211.81568603515626,
|
|
"completions/min_length": 103.6,
|
|
"completions/min_terminated_length": 103.6,
|
|
"epoch": 0.704,
|
|
"grad_norm": 0.0016022155759856105,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 741686145.0,
|
|
"reward": 1.0412675619125367,
|
|
"reward_std": 0.060601814091205596,
|
|
"rewards/accuracy_reward": 0.61259765625,
|
|
"rewards/brier_reward": 0.8442665100097656,
|
|
"rewards/confidence_uniqueness_reward": 0.937886118888855,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0017726486083120107,
|
|
"rewards/frontier_coverage_1": 0.12525941878557206,
|
|
"rewards/frontier_coverage_10": 0.12525941878557206,
|
|
"rewards/frontier_coverage_15": 0.10406550467014312,
|
|
"rewards/frontier_coverage_20": 0.07677052170038223,
|
|
"rewards/frontier_coverage_25": 0.09108839333057403,
|
|
"rewards/frontier_coverage_5": 0.12525941878557206,
|
|
"rewards/frontier_ece_reward": 0.005490910448133946,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.074908447265625,
|
|
"signal/accuracy_reward/group_std_mean": 0.09916009157896041,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0374542236328125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0374542236328125,
|
|
"signal/advantage_abs_mean": 0.04598864167928696,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04598864167928696,
|
|
"signal/advantage_pre_scale_std": 0.09303600341081619,
|
|
"signal/advantage_std": 0.09303600341081619,
|
|
"signal/brier_reward/centered_abs_mean": 0.09707934856414795,
|
|
"signal/brier_reward/group_std_mean": 0.12730673998594283,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012134918570518493,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012134918570518493,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028879277408123016,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03636922165751457,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003609909676015377,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003609909676015377,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014877181965857744,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0023924733977764845,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6630155844031833e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6630155844031833e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1195068359375,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15731086134910582,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1195068359375,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15731086134910582,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.09091014117002487,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1199584573507309,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016272914595901965,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016272914595901965,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05923491641879082,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0771937534213066,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010603050119243561,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010603050119243561,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.053304193913936614,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0676953986287117,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009541450766846537,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009541450766846537,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1195068359375,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15731086134910582,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002139172307215631,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.004334048368036747,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0055789993144571785,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005417560460045934,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005417560460045934,
|
|
"step": 220
|
|
},
|
|
{
|
|
"calibration/aurc": 0.14470123646878835,
|
|
"calibration/batch_distribution_entropy": 0.8739225583129173,
|
|
"calibration/buffer_distribution_entropy": 0.9555200094663506,
|
|
"calibration/confidence_entropy": 0.391513768008919,
|
|
"calibration/coverage@0%": 0.0875,
|
|
"calibration/coverage@1%": 0.0875,
|
|
"calibration/coverage@10%": 0.5484375,
|
|
"calibration/coverage@15%": 0.675,
|
|
"calibration/coverage@20%": 0.72890625,
|
|
"calibration/coverage@25%": 0.77265625,
|
|
"calibration/coverage@30%": 0.81953125,
|
|
"calibration/coverage@5%": 0.31953125,
|
|
"calibration/ece": 0.15345659905795017,
|
|
"calibration/mean_confidence": 0.6121585571920498,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 562.2,
|
|
"completions/max_terminated_length": 562.2,
|
|
"completions/mean_length": 214.69287109375,
|
|
"completions/mean_terminated_length": 214.69287109375,
|
|
"completions/min_length": 101.0,
|
|
"completions/min_terminated_length": 101.0,
|
|
"epoch": 0.72,
|
|
"grad_norm": 0.0017858616774901748,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 758894456.0,
|
|
"reward": 1.0460729122161865,
|
|
"reward_std": 0.0672881230711937,
|
|
"rewards/accuracy_reward": 0.62294921875,
|
|
"rewards/brier_reward": 0.8469637155532836,
|
|
"rewards/confidence_uniqueness_reward": 0.935894775390625,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.001939373160712421,
|
|
"rewards/frontier_coverage_1": 0.11669339537620545,
|
|
"rewards/frontier_coverage_10": 0.11649550646543502,
|
|
"rewards/frontier_coverage_15": 0.09009001255035401,
|
|
"rewards/frontier_coverage_20": 0.07372135147452355,
|
|
"rewards/frontier_coverage_25": 0.1068428099155426,
|
|
"rewards/frontier_coverage_5": 0.11669339537620545,
|
|
"rewards/frontier_ece_reward": 0.005344946216791868,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082562255859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.1146527960896492,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412811279296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0412811279296875,
|
|
"signal/advantage_abs_mean": 0.04909345507621765,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04909345507621765,
|
|
"signal/advantage_pre_scale_std": 0.10091503411531448,
|
|
"signal/advantage_std": 0.10091503411531448,
|
|
"signal/brier_reward/centered_abs_mean": 0.0935791552066803,
|
|
"signal/brier_reward/group_std_mean": 0.12280905842781067,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011697394400835037,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011697394400835037,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02970266342163086,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.037385367602109906,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037128329277038574,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037128329277038574,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017587365116924047,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027441283222287894,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1481383120990356e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1481383120990356e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.10815362930297852,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.14316221177577973,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019359499448910356,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019359499448910356,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10738050639629364,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1421646863222122,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019221110735088587,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019221110735088587,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0754195511341095,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10019035190343857,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013500099536031484,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013500099536031484,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.052413633465766905,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0678664654493332,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009382039890624583,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009382039890624583,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05627275034785271,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07161930799484253,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010072821867652237,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010072821867652237,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.10815362930297852,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.14316221177577973,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019359499448910356,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019359499448910356,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0037975626531988383,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004970707837492228,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004746953316498548,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004746953316498548,
|
|
"step": 225
|
|
},
|
|
{
|
|
"calibration/aurc": 0.10276625605061121,
|
|
"calibration/batch_distribution_entropy": 0.7755610517250693,
|
|
"calibration/buffer_distribution_entropy": 0.9506196997878165,
|
|
"calibration/confidence_entropy": 0.3668382010462838,
|
|
"calibration/coverage@0%": 0.16171875,
|
|
"calibration/coverage@1%": 0.171875,
|
|
"calibration/coverage@10%": 0.60703125,
|
|
"calibration/coverage@15%": 0.70390625,
|
|
"calibration/coverage@20%": 0.83125,
|
|
"calibration/coverage@25%": 0.925,
|
|
"calibration/coverage@30%": 0.94921875,
|
|
"calibration/coverage@5%": 0.48046875,
|
|
"calibration/ece": 0.09331710770831818,
|
|
"calibration/mean_confidence": 0.7038330452083182,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 523.8,
|
|
"completions/max_terminated_length": 523.8,
|
|
"completions/mean_length": 220.8560546875,
|
|
"completions/mean_terminated_length": 220.8560546875,
|
|
"completions/min_length": 100.4,
|
|
"completions/min_terminated_length": 100.4,
|
|
"epoch": 0.736,
|
|
"grad_norm": 0.002271299483254552,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0003,
|
|
"num_tokens": 776095606.0,
|
|
"reward": 1.0492503643035889,
|
|
"reward_std": 0.06152931973338127,
|
|
"rewards/accuracy_reward": 0.6287109375,
|
|
"rewards/brier_reward": 0.8473744511604309,
|
|
"rewards/confidence_uniqueness_reward": 0.9358478307723999,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0017740631010383368,
|
|
"rewards/frontier_coverage_1": 0.11950143873691559,
|
|
"rewards/frontier_coverage_10": 0.11690339148044586,
|
|
"rewards/frontier_coverage_15": 0.09336267858743667,
|
|
"rewards/frontier_coverage_20": 0.0775704950094223,
|
|
"rewards/frontier_coverage_25": 0.11357748061418534,
|
|
"rewards/frontier_coverage_5": 0.11950143873691559,
|
|
"rewards/frontier_ece_reward": 0.004873855458572507,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0777587890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10373825207352638,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125,
|
|
"signal/advantage_abs_mean": 0.046919054538011554,
|
|
"signal/advantage_pre_scale_abs_mean": 0.046919054538011554,
|
|
"signal/advantage_pre_scale_std": 0.0959189236164093,
|
|
"signal/advantage_std": 0.0959189236164093,
|
|
"signal/brier_reward/centered_abs_mean": 0.0924751952290535,
|
|
"signal/brier_reward/group_std_mean": 0.1207397997379303,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011559399403631687,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011559399403631687,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533731937408447,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0374361515045166,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003691716492176056,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003691716492176056,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001516377995721996,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002370060421526432,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.714316433412023e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.714316433412023e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11420103460550309,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15096487402915953,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020441983826458452,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020441983826458452,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11071749776601791,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14640629887580872,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019818432396277786,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019818432396277786,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0779910683631897,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.1036263257265091,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013960400596261025,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013960400596261025,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05327008962631226,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06925814524292946,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009535345481708646,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009535345481708646,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05671848207712173,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07240066826343536,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001015260792337358,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001015260792337358,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11420103460550309,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15096487402915953,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020441983826458452,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020441983826458452,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0037134474609047175,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004857833497226238,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004641809326130897,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004641809326130897,
|
|
"step": 230
|
|
},
|
|
{
|
|
"calibration/aurc": 0.15163492450002442,
|
|
"calibration/batch_distribution_entropy": 0.8904229891990763,
|
|
"calibration/buffer_distribution_entropy": 0.9448809655937878,
|
|
"calibration/confidence_entropy": 0.3949929669354096,
|
|
"calibration/coverage@0%": 0.07835171568627451,
|
|
"calibration/coverage@1%": 0.14788296568627451,
|
|
"calibration/coverage@10%": 0.5339736519607843,
|
|
"calibration/coverage@15%": 0.626219362745098,
|
|
"calibration/coverage@20%": 0.7043719362745098,
|
|
"calibration/coverage@25%": 0.7840992647058823,
|
|
"calibration/coverage@30%": 0.8411642156862745,
|
|
"calibration/coverage@5%": 0.3206341911764706,
|
|
"calibration/ece": 0.13040471542112503,
|
|
"calibration/mean_confidence": 0.5816119304946545,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 682.0,
|
|
"completions/max_terminated_length": 494.4,
|
|
"completions/mean_length": 218.71201171875,
|
|
"completions/mean_terminated_length": 218.4533935546875,
|
|
"completions/min_length": 112.0,
|
|
"completions/min_terminated_length": 112.0,
|
|
"epoch": 0.752,
|
|
"grad_norm": 0.001796262338757515,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 793562417.0,
|
|
"reward": 1.0391303777694703,
|
|
"reward_std": 0.06580677628517151,
|
|
"rewards/accuracy_reward": 0.61123046875,
|
|
"rewards/brier_reward": 0.8342607021331787,
|
|
"rewards/confidence_uniqueness_reward": 0.9415198564529419,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0019529166864231228,
|
|
"rewards/frontier_coverage_1": 0.1229474276304245,
|
|
"rewards/frontier_coverage_10": 0.1206100896000862,
|
|
"rewards/frontier_coverage_15": 0.09027208015322685,
|
|
"rewards/frontier_coverage_20": 0.07027497664093971,
|
|
"rewards/frontier_coverage_25": 0.09396415501832962,
|
|
"rewards/frontier_coverage_5": 0.1229474276304245,
|
|
"rewards/frontier_ece_reward": 0.004472200945019722,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080157470703125,
|
|
"signal/accuracy_reward/group_std_mean": 0.10887984037399293,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400787353515625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400787353515625,
|
|
"signal/advantage_abs_mean": 0.04903002083301544,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04903002083301544,
|
|
"signal/advantage_pre_scale_std": 0.1000540629029274,
|
|
"signal/advantage_std": 0.1000540629029274,
|
|
"signal/brier_reward/centered_abs_mean": 0.09556291699409485,
|
|
"signal/brier_reward/group_std_mean": 0.124751777946949,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011945364624261856,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011945364624261856,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026840757578611374,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.033848896622657776,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033550946973264217,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033550946973264217,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001602224470116198,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002494157268665731,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8679816023213788e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8679816023213788e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11630584448575973,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15271863341331482,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002081874618306756,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002081874618306756,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11303210407495498,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14861542731523514,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002023274498060346,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002023274498060346,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07724076434969902,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10189598947763442,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013826095964759588,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013826095964759588,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05278810262680054,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06799793317914009,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009449069970287382,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009449069970287382,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05864310711622238,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0744215801358223,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010497116250917315,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010497116250917315,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11630584448575973,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15271863341331482,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002081874618306756,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002081874618306756,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0036135178990662096,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0046929454430937765,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004516897373832762,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004516897373832762,
|
|
"step": 235
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1354460653145263,
|
|
"calibration/batch_distribution_entropy": 0.908943110934102,
|
|
"calibration/buffer_distribution_entropy": 0.940215096807232,
|
|
"calibration/confidence_entropy": 0.4264768475786309,
|
|
"calibration/coverage@0%": 0.24072610294117647,
|
|
"calibration/coverage@1%": 0.2852573529411765,
|
|
"calibration/coverage@10%": 0.5294638480392158,
|
|
"calibration/coverage@15%": 0.63828125,
|
|
"calibration/coverage@20%": 0.7453125,
|
|
"calibration/coverage@25%": 0.81015625,
|
|
"calibration/coverage@30%": 0.871875,
|
|
"calibration/coverage@5%": 0.44806985294117646,
|
|
"calibration/ece": 0.16837747358087166,
|
|
"calibration/mean_confidence": 0.5540928773798913,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.000390625,
|
|
"completions/max_length": 1317.8,
|
|
"completions/max_terminated_length": 464.0,
|
|
"completions/mean_length": 222.9625,
|
|
"completions/mean_terminated_length": 222.44960021972656,
|
|
"completions/min_length": 104.6,
|
|
"completions/min_terminated_length": 104.6,
|
|
"epoch": 0.768,
|
|
"grad_norm": 0.0016375478589907289,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0011,
|
|
"num_tokens": 810778257.0,
|
|
"reward": 1.0248207330703736,
|
|
"reward_std": 0.061895406991243365,
|
|
"rewards/accuracy_reward": 0.576953125,
|
|
"rewards/brier_reward": 0.8413738012313843,
|
|
"rewards/confidence_uniqueness_reward": 0.9406145691871644,
|
|
"rewards/format_reward": 0.99951171875,
|
|
"rewards/frontier_aurc_reward": -0.001507855043746531,
|
|
"rewards/frontier_coverage_1": 0.1511134535074234,
|
|
"rewards/frontier_coverage_10": 0.14939744472503663,
|
|
"rewards/frontier_coverage_15": 0.11022275984287262,
|
|
"rewards/frontier_coverage_20": 0.08167696744203568,
|
|
"rewards/frontier_coverage_25": 0.09813316464424134,
|
|
"rewards/frontier_coverage_5": 0.1511134535074234,
|
|
"rewards/frontier_ece_reward": 0.00472887079231441,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07352294921875,
|
|
"signal/accuracy_reward/group_std_mean": 0.09980905205011367,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.036761474609375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.036761474609375,
|
|
"signal/advantage_abs_mean": 0.045818436145782473,
|
|
"signal/advantage_pre_scale_abs_mean": 0.045818436145782473,
|
|
"signal/advantage_pre_scale_std": 0.0930859088897705,
|
|
"signal/advantage_std": 0.0930859088897705,
|
|
"signal/brier_reward/centered_abs_mean": 0.09857990890741349,
|
|
"signal/brier_reward/group_std_mean": 0.13005568087100983,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012322488613426686,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012322488613426686,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026614753901958464,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.034846174716949466,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003326844237744808,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003326844237744808,
|
|
"signal/format_reward/centered_abs_mean": 0.000946044921875,
|
|
"signal/format_reward/group_std_mean": 0.0027621358167380095,
|
|
"signal/format_reward/group_zero_std_frac": 0.984375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011487239389680326,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0017788737313821912,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0562157442327588e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0562157442327588e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1333732545375824,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17638799846172332,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023873811587691307,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023873811587691307,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.13058110177516938,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.17269828617572786,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023374016396701335,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023374016396701335,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08957252502441407,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11884426325559616,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016033481108024717,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016033481108024717,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.06021819338202476,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0782925844192505,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010779056698083877,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010779056698083877,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.057765302062034604,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07384001463651657,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001033998851198703,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001033998851198703,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1333732545375824,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17638799846172332,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023873811587691307,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023873811587691307,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0036720467731356623,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004740559495985508,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004590058466419578,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004590058466419578,
|
|
"step": 240
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1780989063016482,
|
|
"calibration/batch_distribution_entropy": 0.8732530039163613,
|
|
"calibration/buffer_distribution_entropy": 0.9368918455830914,
|
|
"calibration/confidence_entropy": 0.37058566571107476,
|
|
"calibration/coverage@0%": 0.16484375,
|
|
"calibration/coverage@1%": 0.1953125,
|
|
"calibration/coverage@10%": 0.44375,
|
|
"calibration/coverage@15%": 0.503125,
|
|
"calibration/coverage@20%": 0.5875,
|
|
"calibration/coverage@25%": 0.71484375,
|
|
"calibration/coverage@30%": 0.7625,
|
|
"calibration/coverage@5%": 0.315625,
|
|
"calibration/ece": 0.10408663335191717,
|
|
"calibration/mean_confidence": 0.5442599486038305,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 693.4,
|
|
"completions/max_terminated_length": 591.2,
|
|
"completions/mean_length": 223.282421875,
|
|
"completions/mean_terminated_length": 223.15485534667968,
|
|
"completions/min_length": 110.4,
|
|
"completions/min_terminated_length": 110.4,
|
|
"epoch": 0.784,
|
|
"grad_norm": 0.0017808079719543457,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 828239037.0,
|
|
"reward": 1.0469671964645386,
|
|
"reward_std": 0.0657818466424942,
|
|
"rewards/accuracy_reward": 0.63125,
|
|
"rewards/brier_reward": 0.8252038955688477,
|
|
"rewards/confidence_uniqueness_reward": 0.9421940207481384,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0015878735110163688,
|
|
"rewards/frontier_coverage_1": 0.10298990458250046,
|
|
"rewards/frontier_coverage_10": 0.10325277298688888,
|
|
"rewards/frontier_coverage_15": 0.08138184025883674,
|
|
"rewards/frontier_coverage_20": 0.06994581818580628,
|
|
"rewards/frontier_coverage_25": 0.10279036164283753,
|
|
"rewards/frontier_coverage_5": 0.10298990458250046,
|
|
"rewards/frontier_ece_reward": 0.0036763294599950315,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08505859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11480707228183747,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042529296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042529296875,
|
|
"signal/advantage_abs_mean": 0.04915754199028015,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04915754199028015,
|
|
"signal/advantage_pre_scale_std": 0.09866253137588502,
|
|
"signal/advantage_std": 0.09866253137588502,
|
|
"signal/brier_reward/centered_abs_mean": 0.1016099825501442,
|
|
"signal/brier_reward/group_std_mean": 0.13323958665132524,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012701247818768024,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012701247818768024,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02635921761393547,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.033456063643097875,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032949022017419336,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032949022017419336,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001376147347036749,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002203846746124327,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.463303608237766e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.463303608237766e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13170208930969238,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1737958937883377,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023574673570692537,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023574673570692537,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.12848464101552964,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.16970953047275544,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022998749278485774,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022998749278485774,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.08622983396053314,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.11427305340766906,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015435139182955026,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015435139182955026,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05881091207265854,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0768322467803955,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010527152917347848,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010527152917347848,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.05912056043744087,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.07621604949235916,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010582579649053513,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010582579649053513,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13170208930969238,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1737958937883377,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023574673570692537,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023574673570692537,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003539442550390959,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004590986762195826,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044243031879886985,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044243031879886985,
|
|
"step": 245
|
|
},
|
|
{
|
|
"calibration/aurc": 0.26110712774742,
|
|
"calibration/batch_distribution_entropy": 0.8637357131440693,
|
|
"calibration/buffer_distribution_entropy": 0.9338101197498352,
|
|
"calibration/confidence_entropy": 0.41911873533648614,
|
|
"calibration/coverage@0%": 0.025,
|
|
"calibration/coverage@1%": 0.025,
|
|
"calibration/coverage@10%": 0.30546875,
|
|
"calibration/coverage@15%": 0.34453125,
|
|
"calibration/coverage@20%": 0.54609375,
|
|
"calibration/coverage@25%": 0.5859375,
|
|
"calibration/coverage@30%": 0.63671875,
|
|
"calibration/coverage@5%": 0.078125,
|
|
"calibration/ece": 0.1646667575702236,
|
|
"calibration/mean_confidence": 0.5018031308830292,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 694.4,
|
|
"completions/max_terminated_length": 509.2,
|
|
"completions/mean_length": 220.36611328125,
|
|
"completions/mean_terminated_length": 220.2374237060547,
|
|
"completions/min_length": 105.8,
|
|
"completions/min_terminated_length": 105.8,
|
|
"epoch": 0.8,
|
|
"grad_norm": 0.002036831108853221,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 845506146.0,
|
|
"reward": 1.0645583629608155,
|
|
"reward_std": 0.06164888888597488,
|
|
"rewards/accuracy_reward": 0.6568359375,
|
|
"rewards/brier_reward": 0.8579505681991577,
|
|
"rewards/confidence_uniqueness_reward": 0.9386770725250244,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.001275635720230639,
|
|
"rewards/frontier_coverage_1": 0.11075811237096786,
|
|
"rewards/frontier_coverage_10": 0.10875446647405625,
|
|
"rewards/frontier_coverage_15": 0.08313208520412445,
|
|
"rewards/frontier_coverage_20": 0.07641463130712509,
|
|
"rewards/frontier_coverage_25": 0.13115044236183165,
|
|
"rewards/frontier_coverage_5": 0.11075811237096786,
|
|
"rewards/frontier_ece_reward": 0.004146079532802105,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0808837890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10505216717720031,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04044189453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04044189453125,
|
|
"signal/advantage_abs_mean": 0.04755032882094383,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04755032882094383,
|
|
"signal/advantage_pre_scale_std": 0.09882079064846039,
|
|
"signal/advantage_std": 0.09882079064846039,
|
|
"signal/brier_reward/centered_abs_mean": 0.08948185741901397,
|
|
"signal/brier_reward/group_std_mean": 0.1173609048128128,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011185232177376747,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011185232177376747,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028289894759654998,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.035136304795742035,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035362368449568748,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035362368449568748,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013092580833472312,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021080786129459737,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.343571886740392e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.343571886740392e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11353515535593033,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15011467039585114,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020322792232036592,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020322792232036592,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10932374000549316,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14465901702642442,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001956894900649786,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001956894900649786,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07167089506983756,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0952614426612854,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001282908977009356,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001282908977009356,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05188070461153984,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0674702912569046,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009286645916290581,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009286645916290581,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06000246405601502,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0765003427863121,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010740441037341952,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010740441037341952,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11353515535593033,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15011467039585114,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020322792232036592,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020322792232036592,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0031503901816904547,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004117331793531775,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039379877271130683,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039379877271130683,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"eval_calibration/aurc": 0.4328469329018436,
|
|
"eval_calibration/batch_distribution_entropy": 0.9017827126146071,
|
|
"eval_calibration/buffer_distribution_entropy": 0.933450971645078,
|
|
"eval_calibration/confidence_entropy": 0.4392779969401228,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.0625,
|
|
"eval_calibration/coverage@15%": 0.0625,
|
|
"eval_calibration/coverage@20%": 0.25,
|
|
"eval_calibration/coverage@25%": 0.3125,
|
|
"eval_calibration/coverage@30%": 0.34375,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.214326634140625,
|
|
"eval_calibration/mean_confidence": 0.5412016341406249,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 416.0,
|
|
"eval_completions/max_terminated_length": 416.0,
|
|
"eval_completions/mean_length": 219.94239044189453,
|
|
"eval_completions/mean_terminated_length": 219.94239044189453,
|
|
"eval_completions/min_length": 115.5,
|
|
"eval_completions/min_terminated_length": 115.5,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 845506146.0,
|
|
"eval_reward": 0.9467860460281372,
|
|
"eval_reward_std": 0.24670489132404327,
|
|
"eval_rewards/accuracy_reward": 0.44140625,
|
|
"eval_rewards/brier_reward": 0.7873809337615967,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.890869140625,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.004038012819364667,
|
|
"eval_rewards/frontier_coverage_1": 0.20367811620235443,
|
|
"eval_rewards/frontier_coverage_10": 0.19325406849384308,
|
|
"eval_rewards/frontier_coverage_15": 0.12953777611255646,
|
|
"eval_rewards/frontier_coverage_20": 0.08314738422632217,
|
|
"eval_rewards/frontier_coverage_25": 0.06818825379014015,
|
|
"eval_rewards/frontier_coverage_5": 0.20367811620235443,
|
|
"eval_rewards/frontier_ece_reward": 0.004763010889291763,
|
|
"eval_runtime": 10.7531,
|
|
"eval_samples_per_second": 46.498,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.473388671875,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.4939229190349579,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2366943359375,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2366943359375,
|
|
"eval_signal/advantage_abs_mean": 0.22974882274866104,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.22974882274866104,
|
|
"eval_signal/advantage_pre_scale_std": 0.2435239553451538,
|
|
"eval_signal/advantage_std": 0.2435239553451538,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.23106026649475098,
|
|
"eval_signal/brier_reward/group_std_mean": 0.2880419045686722,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028882533311843872,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028882533311843872,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0494232177734375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.058502499014139175,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061779022216796875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061779022216796875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00543490145355463,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010733058210462332,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.728474105941132e-05,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.728474105941132e-05,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34148095548152924,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.4213644117116928,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006112508941441774,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006112508941441774,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3236210346221924,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.4000513255596161,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005792815936729312,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005792815936729312,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20580045133829117,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.2604397386312485,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036838280502706766,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036838280502706766,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.11519244313240051,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.1483083888888359,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020619446877390146,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020619446877390146,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1481623351573944,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.19164805114269257,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00265210575889796,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00265210575889796,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34148095548152924,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.4213644117116928,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006112508941441774,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006112508941441774,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006984395207837224,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.008835344575345516,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000873049400979653,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000873049400979653,
|
|
"eval_steps_per_second": 0.186,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"step": 250,
|
|
"train_probe_calibration/aurc": 0.12986536721544725,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.811839844274961,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.933494045269648,
|
|
"train_probe_calibration/confidence_entropy": 0.354105295763641,
|
|
"train_probe_calibration/coverage@0%": 0.140625,
|
|
"train_probe_calibration/coverage@1%": 0.140625,
|
|
"train_probe_calibration/coverage@10%": 0.609375,
|
|
"train_probe_calibration/coverage@15%": 0.765625,
|
|
"train_probe_calibration/coverage@20%": 0.828125,
|
|
"train_probe_calibration/coverage@25%": 0.90625,
|
|
"train_probe_calibration/coverage@30%": 0.921875,
|
|
"train_probe_calibration/coverage@5%": 0.484375,
|
|
"train_probe_calibration/ece": 0.13531250000000003,
|
|
"train_probe_calibration/mean_confidence": 0.624875,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 376.0,
|
|
"train_probe_completions/max_terminated_length": 376.0,
|
|
"train_probe_completions/mean_length": 217.32789611816406,
|
|
"train_probe_completions/mean_terminated_length": 217.32789611816406,
|
|
"train_probe_completions/min_length": 120.5,
|
|
"train_probe_completions/min_terminated_length": 120.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 845506146.0,
|
|
"train_probe_reward": 1.0561645030975342,
|
|
"train_probe_reward_std": 0.2325623854994774,
|
|
"train_probe_rewards/accuracy_reward": 0.654296875,
|
|
"train_probe_rewards/brier_reward": 0.8493243455886841,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.889404296875,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.001908503647428006,
|
|
"train_probe_rewards/frontier_coverage_1": 0.11155515164136887,
|
|
"train_probe_rewards/frontier_coverage_10": 0.10592306032776833,
|
|
"train_probe_rewards/frontier_coverage_15": 0.08040037006139755,
|
|
"train_probe_rewards/frontier_coverage_20": 0.0775928758084774,
|
|
"train_probe_rewards/frontier_coverage_25": 0.1379874050617218,
|
|
"train_probe_rewards/frontier_coverage_5": 0.11155515164136887,
|
|
"train_probe_rewards/frontier_ece_reward": 0.004171320935711265,
|
|
"train_probe_runtime": 10.1866,
|
|
"train_probe_samples_per_second": 49.084,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4449462890625,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.4788653701543808,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22247314453125,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22247314453125,
|
|
"train_probe_signal/advantage_abs_mean": 0.21122215688228607,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.21122215688228607,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.22976724058389664,
|
|
"train_probe_signal/advantage_std": 0.22976724058389664,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.18194539844989777,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.245933398604393,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02274317480623722,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02274317480623722,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0509185791015625,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.061591994017362595,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063648223876953125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063648223876953125,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.003361418261192739,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.006412317277863622,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.016938641550951e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.016938641550951e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3020322024822235,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4129178822040558,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0054063762072473764,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0054063762072473764,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2831447720527649,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.3897576928138733,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050682914443314075,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0050682914443314075,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1778106540441513,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.2535991668701172,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031828106148168445,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031828106148168445,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1011722981929779,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.14043454825878143,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018109841039404273,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018109841039404273,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.14492832124233246,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.17802315205335617,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002594216726720333,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002594216726720333,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3020322024822235,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4129178822040558,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0054063762072473764,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0054063762072473764,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.006162431091070175,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.008223664714023471,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007703038863837719,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007703038863837719,
|
|
"train_probe_steps_per_second": 0.196
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2613374077664524,
|
|
"calibration/batch_distribution_entropy": 0.8562545855944862,
|
|
"calibration/buffer_distribution_entropy": 0.9332085807167052,
|
|
"calibration/confidence_entropy": 0.36503547135583225,
|
|
"calibration/coverage@0%": 0.0875,
|
|
"calibration/coverage@1%": 0.0921875,
|
|
"calibration/coverage@10%": 0.27109375,
|
|
"calibration/coverage@15%": 0.346875,
|
|
"calibration/coverage@20%": 0.40078125,
|
|
"calibration/coverage@25%": 0.44921875,
|
|
"calibration/coverage@30%": 0.6296875,
|
|
"calibration/coverage@5%": 0.17265625,
|
|
"calibration/ece": 0.14208048120424616,
|
|
"calibration/mean_confidence": 0.5988117062957538,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 768.4,
|
|
"completions/max_terminated_length": 564.4,
|
|
"completions/mean_length": 213.418359375,
|
|
"completions/mean_terminated_length": 213.2895263671875,
|
|
"completions/min_length": 100.2,
|
|
"completions/min_terminated_length": 100.2,
|
|
"epoch": 0.816,
|
|
"grad_norm": 0.0023187189362943172,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0006,
|
|
"num_tokens": 862790718.0,
|
|
"reward": 1.053348708152771,
|
|
"reward_std": 0.06448897942900658,
|
|
"rewards/accuracy_reward": 0.64423828125,
|
|
"rewards/brier_reward": 0.8300428271293641,
|
|
"rewards/confidence_uniqueness_reward": 0.9373907327651978,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002119234437122941,
|
|
"rewards/frontier_coverage_1": 0.09330451190471649,
|
|
"rewards/frontier_coverage_10": 0.09116496592760086,
|
|
"rewards/frontier_coverage_15": 0.0719268336892128,
|
|
"rewards/frontier_coverage_20": 0.07347770035266876,
|
|
"rewards/frontier_coverage_25": 0.13299526423215866,
|
|
"rewards/frontier_coverage_5": 0.09330451190471649,
|
|
"rewards/frontier_ece_reward": 0.0034532100893557073,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.082476806640625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10868191868066787,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412384033203125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0412384033203125,
|
|
"signal/advantage_abs_mean": 0.048784293979406354,
|
|
"signal/advantage_pre_scale_abs_mean": 0.048784293979406354,
|
|
"signal/advantage_pre_scale_std": 0.09951501935720444,
|
|
"signal/advantage_std": 0.09951501935720444,
|
|
"signal/brier_reward/centered_abs_mean": 0.10197662115097046,
|
|
"signal/brier_reward/group_std_mean": 0.13159122467041015,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012747077643871308,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012747077643871308,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02778756096959114,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03503857851028443,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034734451211988924,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034734451211988924,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020503590581938624,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.003305292781442404,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6701426142826674e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6701426142826674e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12355931252241134,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1595274031162262,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022117116721346976,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022117116721346976,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11546845138072967,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.1491788625717163,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020668851910158994,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020668851910158994,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07564910650253295,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09829453229904175,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013541190419346094,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013541190419346094,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05537274181842804,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0709018051624298,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000991172017529607,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000991172017529607,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06835410594940186,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08684322088956833,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012235384434461593,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012235384434461593,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12355931252241134,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1595274031162262,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022117116721346976,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022117116721346976,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0032832324504852295,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0042684660758823155,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004104040563106537,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004104040563106537,
|
|
"step": 255
|
|
},
|
|
{
|
|
"calibration/aurc": 0.29902088331269355,
|
|
"calibration/batch_distribution_entropy": 0.8771544045312076,
|
|
"calibration/buffer_distribution_entropy": 0.9328252547851499,
|
|
"calibration/confidence_entropy": 0.3984631748431898,
|
|
"calibration/coverage@0%": 0.18125,
|
|
"calibration/coverage@1%": 0.18359375,
|
|
"calibration/coverage@10%": 0.2484375,
|
|
"calibration/coverage@15%": 0.26953125,
|
|
"calibration/coverage@20%": 0.37109375,
|
|
"calibration/coverage@25%": 0.5015625,
|
|
"calibration/coverage@30%": 0.58515625,
|
|
"calibration/coverage@5%": 0.21953125,
|
|
"calibration/ece": 0.14786460040812144,
|
|
"calibration/mean_confidence": 0.5620528753555174,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 467.8,
|
|
"completions/max_terminated_length": 467.8,
|
|
"completions/mean_length": 210.1916015625,
|
|
"completions/mean_terminated_length": 210.1916015625,
|
|
"completions/min_length": 95.4,
|
|
"completions/min_terminated_length": 95.4,
|
|
"epoch": 0.832,
|
|
"grad_norm": 0.001390106393955648,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 879951432.0,
|
|
"reward": 1.0441203117370605,
|
|
"reward_std": 0.05937432199716568,
|
|
"rewards/accuracy_reward": 0.61279296875,
|
|
"rewards/brier_reward": 0.85388263463974,
|
|
"rewards/confidence_uniqueness_reward": 0.9362106323242188,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0018755205674096942,
|
|
"rewards/frontier_coverage_1": 0.1422416090965271,
|
|
"rewards/frontier_coverage_10": 0.13426189720630646,
|
|
"rewards/frontier_coverage_15": 0.09782664477825165,
|
|
"rewards/frontier_coverage_20": 0.09015188366174698,
|
|
"rewards/frontier_coverage_25": 0.1458705931901932,
|
|
"rewards/frontier_coverage_5": 0.1422416090965271,
|
|
"rewards/frontier_ece_reward": 0.004194558784365654,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.071063232421875,
|
|
"signal/accuracy_reward/group_std_mean": 0.09939071238040924,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.7,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0355316162109375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0355316162109375,
|
|
"signal/advantage_abs_mean": 0.043323104828596117,
|
|
"signal/advantage_pre_scale_abs_mean": 0.043323104828596117,
|
|
"signal/advantage_pre_scale_std": 0.09220470041036606,
|
|
"signal/advantage_std": 0.09220470041036606,
|
|
"signal/brier_reward/centered_abs_mean": 0.08896346092224121,
|
|
"signal/brier_reward/group_std_mean": 0.11624416410923004,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011120432615280151,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011120432615280151,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027381277084350585,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03401793241500854,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003422659635543823,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003422659635543823,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018187327776104211,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029950566589832307,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.255531628383323e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.255531628383323e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11554279178380966,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.1498140126466751,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002068215887993574,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002068215887993574,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10581835210323334,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13711453676223756,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018941484624519945,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018941484624519945,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07099459692835808,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09155822247266769,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012708032154478133,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012708032154478133,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05304303243756294,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.066974838078022,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009494703030213713,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009494703030213713,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06249256357550621,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08098939657211304,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011186168296262622,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011186168296262622,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11554279178380966,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.1498140126466751,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002068215887993574,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002068215887993574,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003021185612305999,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003916465956717729,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037764820153824986,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037764820153824986,
|
|
"step": 260
|
|
},
|
|
{
|
|
"calibration/aurc": 0.16050257914279836,
|
|
"calibration/batch_distribution_entropy": 0.8431910556560627,
|
|
"calibration/buffer_distribution_entropy": 0.931636889634975,
|
|
"calibration/confidence_entropy": 0.3986462888374449,
|
|
"calibration/coverage@0%": 0.12734375,
|
|
"calibration/coverage@1%": 0.1640625,
|
|
"calibration/coverage@10%": 0.41015625,
|
|
"calibration/coverage@15%": 0.50234375,
|
|
"calibration/coverage@20%": 0.76328125,
|
|
"calibration/coverage@25%": 0.8515625,
|
|
"calibration/coverage@30%": 0.93046875,
|
|
"calibration/coverage@5%": 0.26171875,
|
|
"calibration/ece": 0.14965263001674728,
|
|
"calibration/mean_confidence": 0.6677016463180183,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 720.6,
|
|
"completions/max_terminated_length": 503.2,
|
|
"completions/mean_length": 205.6150390625,
|
|
"completions/mean_terminated_length": 205.48502502441406,
|
|
"completions/min_length": 99.0,
|
|
"completions/min_terminated_length": 99.0,
|
|
"epoch": 0.848,
|
|
"grad_norm": 0.0016696910606697202,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 897071298.0,
|
|
"reward": 1.0344059467315674,
|
|
"reward_std": 0.05915949493646622,
|
|
"rewards/accuracy_reward": 0.5978515625,
|
|
"rewards/brier_reward": 0.8411754608154297,
|
|
"rewards/confidence_uniqueness_reward": 0.9387550115585327,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.0017502504400908948,
|
|
"rewards/frontier_coverage_1": 0.13980764299631118,
|
|
"rewards/frontier_coverage_10": 0.12554386407136917,
|
|
"rewards/frontier_coverage_15": 0.09122110307216644,
|
|
"rewards/frontier_coverage_20": 0.0802506908774376,
|
|
"rewards/frontier_coverage_25": 0.12608129382133484,
|
|
"rewards/frontier_coverage_5": 0.13980764299631118,
|
|
"rewards/frontier_ece_reward": 0.00392393465153873,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0715087890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.09724359661340713,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03575439453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03575439453125,
|
|
"signal/advantage_abs_mean": 0.044149909913539884,
|
|
"signal/advantage_pre_scale_abs_mean": 0.044149909913539884,
|
|
"signal/advantage_pre_scale_std": 0.0919294998049736,
|
|
"signal/advantage_std": 0.0919294998049736,
|
|
"signal/brier_reward/centered_abs_mean": 0.0906279519200325,
|
|
"signal/brier_reward/group_std_mean": 0.12001040577888489,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011328493990004063,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011328493990004063,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026908674091100693,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.0335762545466423,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033635842613875867,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033635842613875867,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001596922567114234,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0026214892510324716,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.858491352526471e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.858491352526471e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12007757127285004,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16089081168174743,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149388426914811,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149388426914811,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.1089574933052063,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14638633131980897,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019503391114994884,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019503391114994884,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07160564810037613,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0962700754404068,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012817410985007881,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012817410985007881,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0523877888917923,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06873219013214112,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009377413894981146,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009377413894981146,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06207484975457191,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08022152930498123,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011111397529020906,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011111397529020906,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12007757127285004,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16089081168174743,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002149388426914811,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002149388426914811,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0029800481628626586,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003989115683361888,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003725060203578323,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003725060203578323,
|
|
"step": 265
|
|
},
|
|
{
|
|
"calibration/aurc": 0.1268785574113444,
|
|
"calibration/batch_distribution_entropy": 0.8655835602170416,
|
|
"calibration/buffer_distribution_entropy": 0.930216643410773,
|
|
"calibration/confidence_entropy": 0.38734716180223816,
|
|
"calibration/coverage@0%": 0.37265625,
|
|
"calibration/coverage@1%": 0.5234375,
|
|
"calibration/coverage@10%": 0.6515625,
|
|
"calibration/coverage@15%": 0.684375,
|
|
"calibration/coverage@20%": 0.7078125,
|
|
"calibration/coverage@25%": 0.7296875,
|
|
"calibration/coverage@30%": 0.7578125,
|
|
"calibration/coverage@5%": 0.60625,
|
|
"calibration/ece": 0.18136702633101748,
|
|
"calibration/mean_confidence": 0.6521458386893639,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 934.0,
|
|
"completions/max_terminated_length": 527.6,
|
|
"completions/mean_length": 207.2068359375,
|
|
"completions/mean_terminated_length": 206.9475830078125,
|
|
"completions/min_length": 102.4,
|
|
"completions/min_terminated_length": 102.4,
|
|
"epoch": 0.864,
|
|
"grad_norm": 0.002203070791438222,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0005,
|
|
"num_tokens": 914179912.0,
|
|
"reward": 1.0581453800201417,
|
|
"reward_std": 0.063059052079916,
|
|
"rewards/accuracy_reward": 0.65205078125,
|
|
"rewards/brier_reward": 0.836116099357605,
|
|
"rewards/confidence_uniqueness_reward": 0.9410740494728088,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0015314666437916459,
|
|
"rewards/frontier_coverage_1": 0.09230080395936965,
|
|
"rewards/frontier_coverage_10": 0.08503075465559959,
|
|
"rewards/frontier_coverage_15": 0.06719348207116127,
|
|
"rewards/frontier_coverage_20": 0.07197408005595207,
|
|
"rewards/frontier_coverage_25": 0.1352065086364746,
|
|
"rewards/frontier_coverage_5": 0.09230080395936965,
|
|
"rewards/frontier_ece_reward": 0.002868586964905262,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.080035400390625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10791658908128739,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400177001953125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400177001953125,
|
|
"signal/advantage_abs_mean": 0.04706686735153198,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04706686735153198,
|
|
"signal/advantage_pre_scale_std": 0.09770552664995194,
|
|
"signal/advantage_std": 0.09770552664995194,
|
|
"signal/brier_reward/centered_abs_mean": 0.09740178287029266,
|
|
"signal/brier_reward/group_std_mean": 0.12451921701431275,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012175222858786583,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012175222858786583,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025182069465517997,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.031810386851429936,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031477586831897496,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031477586831897496,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001567194890230894,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025726008461788297,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8052787092747168e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8052787092747168e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12118019163608551,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15813361406326293,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002169125364162028,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002169125364162028,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10587679147720337,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13836515247821807,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018951945239678025,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018951945239678025,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07084731981158257,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0918091282248497,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012681669555604457,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012681669555604457,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05579846650362015,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07049720138311386,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009987925528548657,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009987925528548657,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06971824020147324,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08754518479108811,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012479565106332303,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012479565106332303,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12118019163608551,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15813361406326293,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002169125364162028,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002169125364162028,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0029707029927521942,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0038403474260121583,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003713378740940243,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003713378740940243,
|
|
"step": 270
|
|
},
|
|
{
|
|
"calibration/aurc": 0.273963740303666,
|
|
"calibration/batch_distribution_entropy": 0.8652895848583995,
|
|
"calibration/buffer_distribution_entropy": 0.9298649992702075,
|
|
"calibration/confidence_entropy": 0.37132087278349013,
|
|
"calibration/coverage@0%": 0.10390625,
|
|
"calibration/coverage@1%": 0.125,
|
|
"calibration/coverage@10%": 0.25390625,
|
|
"calibration/coverage@15%": 0.2765625,
|
|
"calibration/coverage@20%": 0.38828125,
|
|
"calibration/coverage@25%": 0.4703125,
|
|
"calibration/coverage@30%": 0.5703125,
|
|
"calibration/coverage@5%": 0.18046875,
|
|
"calibration/ece": 0.16565321925298856,
|
|
"calibration/mean_confidence": 0.5717535064279697,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 677.8,
|
|
"completions/max_terminated_length": 465.0,
|
|
"completions/mean_length": 201.7654296875,
|
|
"completions/mean_terminated_length": 201.63529052734376,
|
|
"completions/min_length": 101.4,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.88,
|
|
"grad_norm": 0.001780420308932662,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0004,
|
|
"num_tokens": 931393062.0,
|
|
"reward": 1.0194225072860719,
|
|
"reward_std": 0.0662582591176033,
|
|
"rewards/accuracy_reward": 0.57275390625,
|
|
"rewards/brier_reward": 0.8221846461296082,
|
|
"rewards/confidence_uniqueness_reward": 0.9409846425056457,
|
|
"rewards/format_reward": 0.99990234375,
|
|
"rewards/frontier_aurc_reward": -0.002486996748484671,
|
|
"rewards/frontier_coverage_1": 0.14020877778530122,
|
|
"rewards/frontier_coverage_10": 0.12404286712408066,
|
|
"rewards/frontier_coverage_15": 0.08958611041307449,
|
|
"rewards/frontier_coverage_20": 0.07855436801910401,
|
|
"rewards/frontier_coverage_25": 0.11499525308609009,
|
|
"rewards/frontier_coverage_5": 0.14020877778530122,
|
|
"rewards/frontier_ece_reward": 0.00347807789221406,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084307861328125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11367884427309036,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625,
|
|
"signal/advantage_abs_mean": 0.05013991966843605,
|
|
"signal/advantage_pre_scale_abs_mean": 0.05013991966843605,
|
|
"signal/advantage_pre_scale_std": 0.10045773237943649,
|
|
"signal/advantage_std": 0.10045773237943649,
|
|
"signal/brier_reward/centered_abs_mean": 0.10293448865413665,
|
|
"signal/brier_reward/group_std_mean": 0.13437058925628662,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012866811081767082,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012866811081767082,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024662094563245772,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03065968081355095,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030827618204057215,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030827618204057215,
|
|
"signal/format_reward/centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/group_std_mean": 0.0005524271633476019,
|
|
"signal/format_reward/group_zero_std_frac": 0.996875,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.002529387711547315,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.004110026638954878,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5276039600139484e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5276039600139484e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13029766380786895,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17371802926063537,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002332328073680401,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002332328073680401,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11453571021556855,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15250465869903565,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020501891616731883,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020501891616731883,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07582000344991684,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10073214769363403,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001357178040780127,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001357178040780127,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0566535584628582,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0733156070113182,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010140986763872207,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010140986763872207,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06789239197969436,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.0869957149028778,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012152737472206354,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012152737472206354,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13029766380786895,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17371802926063537,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002332328073680401,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002332328073680401,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.003146560303866863,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.004122556420043111,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003933200379833579,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003933200379833579,
|
|
"step": 275
|
|
},
|
|
{
|
|
"calibration/aurc": 0.25215917817714806,
|
|
"calibration/batch_distribution_entropy": 0.8739498979185845,
|
|
"calibration/buffer_distribution_entropy": 0.930060328102031,
|
|
"calibration/confidence_entropy": 0.39616207217016336,
|
|
"calibration/coverage@0%": 0.103125,
|
|
"calibration/coverage@1%": 0.13125,
|
|
"calibration/coverage@10%": 0.45390625,
|
|
"calibration/coverage@15%": 0.490625,
|
|
"calibration/coverage@20%": 0.5234375,
|
|
"calibration/coverage@25%": 0.546875,
|
|
"calibration/coverage@30%": 0.56640625,
|
|
"calibration/coverage@5%": 0.22109375,
|
|
"calibration/ece": 0.1740826497875611,
|
|
"calibration/mean_confidence": 0.5997064484977905,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 661.8,
|
|
"completions/max_terminated_length": 443.6,
|
|
"completions/mean_length": 200.58515625,
|
|
"completions/mean_terminated_length": 200.45480651855468,
|
|
"completions/min_length": 96.6,
|
|
"completions/min_terminated_length": 96.6,
|
|
"epoch": 0.896,
|
|
"grad_norm": 0.001766073633916676,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0007,
|
|
"num_tokens": 948557902.0,
|
|
"reward": 1.0397424459457398,
|
|
"reward_std": 0.06280734091997146,
|
|
"rewards/accuracy_reward": 0.6123046875,
|
|
"rewards/brier_reward": 0.8335294604301453,
|
|
"rewards/confidence_uniqueness_reward": 0.94145667552948,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.002396345266606659,
|
|
"rewards/frontier_coverage_1": 0.12290604412555695,
|
|
"rewards/frontier_coverage_10": 0.10812564045190812,
|
|
"rewards/frontier_coverage_15": 0.08217538744211197,
|
|
"rewards/frontier_coverage_20": 0.07664992213249207,
|
|
"rewards/frontier_coverage_25": 0.12757501602172852,
|
|
"rewards/frontier_coverage_5": 0.12290604412555695,
|
|
"rewards/frontier_ece_reward": 0.0031625948380678893,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.07562255859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10868183225393295,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037811279296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037811279296875,
|
|
"signal/advantage_abs_mean": 0.04412608295679092,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04412608295679092,
|
|
"signal/advantage_pre_scale_std": 0.09516832679510116,
|
|
"signal/advantage_std": 0.09516832679510116,
|
|
"signal/brier_reward/centered_abs_mean": 0.09220918267965317,
|
|
"signal/brier_reward/group_std_mean": 0.12125321626663207,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011526147834956646,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011526147834956646,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023380208760499954,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.02966206856071949,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029225260950624943,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029225260950624943,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021038307808339597,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0033327710116282105,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7658572182408534e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7658572182408534e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.11688004732131958,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15271745324134828,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00209215278737247,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00209215278737247,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10004872977733612,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13066715896129608,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001790872262790799,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001790872262790799,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06665360033512116,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08639876991510391,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011930993758141994,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011930993758141994,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05205147713422775,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06605355590581893,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009317214018665255,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009317214018665255,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06579188704490661,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08515497148036957,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011776747182011605,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011776747182011605,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.11688004732131958,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15271745324134828,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00209215278737247,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00209215278737247,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0026048448868095874,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0034122115466743708,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.040625,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003256056108511984,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003256056108511984,
|
|
"step": 280
|
|
},
|
|
{
|
|
"calibration/aurc": 0.2771183806359747,
|
|
"calibration/batch_distribution_entropy": 0.8578019537959676,
|
|
"calibration/buffer_distribution_entropy": 0.9298157417811967,
|
|
"calibration/confidence_entropy": 0.38584274130978946,
|
|
"calibration/coverage@0%": 0.19453125,
|
|
"calibration/coverage@1%": 0.20078125,
|
|
"calibration/coverage@10%": 0.31953125,
|
|
"calibration/coverage@15%": 0.41953125,
|
|
"calibration/coverage@20%": 0.4734375,
|
|
"calibration/coverage@25%": 0.59375,
|
|
"calibration/coverage@30%": 0.62734375,
|
|
"calibration/coverage@5%": 0.25234375,
|
|
"calibration/ece": 0.1462862952874513,
|
|
"calibration/mean_confidence": 0.5480367561507691,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 504.0,
|
|
"completions/max_terminated_length": 504.0,
|
|
"completions/mean_length": 203.267578125,
|
|
"completions/mean_terminated_length": 203.267578125,
|
|
"completions/min_length": 97.6,
|
|
"completions/min_terminated_length": 97.6,
|
|
"epoch": 0.912,
|
|
"grad_norm": 0.0015597037272527814,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 965690658.0,
|
|
"reward": 1.02649986743927,
|
|
"reward_std": 0.06281092613935471,
|
|
"rewards/accuracy_reward": 0.58515625,
|
|
"rewards/brier_reward": 0.8292442321777344,
|
|
"rewards/confidence_uniqueness_reward": 0.946649169921875,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0018648097291588783,
|
|
"rewards/frontier_coverage_1": 0.13054397702217102,
|
|
"rewards/frontier_coverage_10": 0.11399659514427185,
|
|
"rewards/frontier_coverage_15": 0.08423100709915161,
|
|
"rewards/frontier_coverage_20": 0.07361575737595558,
|
|
"rewards/frontier_coverage_25": 0.1144769087433815,
|
|
"rewards/frontier_coverage_5": 0.13054397702217102,
|
|
"rewards/frontier_ece_reward": 0.0030390231404453516,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.079638671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10629072934389114,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398193359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0398193359375,
|
|
"signal/advantage_abs_mean": 0.04734830111265183,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04734830111265183,
|
|
"signal/advantage_pre_scale_std": 0.09430029839277268,
|
|
"signal/advantage_std": 0.09430029839277268,
|
|
"signal/brier_reward/centered_abs_mean": 0.09894435703754426,
|
|
"signal/brier_reward/group_std_mean": 0.12911319881677627,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012368044629693032,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012368044629693032,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022022104263305663,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.027249596640467645,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002752763032913208,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002752763032913208,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014280044939368962,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0022502636536955835,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5561279471730813e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5561279471730813e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.1327954038977623,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17255037724971772,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002377037703990936,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002377037703990936,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11172600984573364,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14537906944751738,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019998955307528377,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019998955307528377,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07505722343921661,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09774749577045441,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013435242231935262,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013435242231935262,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.0550868459045887,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07034202218055725,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000986054469831288,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000986054469831288,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06590208411216736,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08453426957130432,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011796473059803247,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011796473059803247,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1327954038977623,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17255037724971772,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002377037703990936,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002377037703990936,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002762398170307279,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003595150355249643,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00034529977128840985,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00034529977128840985,
|
|
"step": 285
|
|
},
|
|
{
|
|
"calibration/aurc": 0.18523557038518218,
|
|
"calibration/batch_distribution_entropy": 0.9283445164377936,
|
|
"calibration/buffer_distribution_entropy": 0.9310390835813201,
|
|
"calibration/confidence_entropy": 0.44306056288304363,
|
|
"calibration/coverage@0%": 0.08046875,
|
|
"calibration/coverage@1%": 0.08046875,
|
|
"calibration/coverage@10%": 0.40859375,
|
|
"calibration/coverage@15%": 0.51640625,
|
|
"calibration/coverage@20%": 0.59140625,
|
|
"calibration/coverage@25%": 0.66015625,
|
|
"calibration/coverage@30%": 0.7515625,
|
|
"calibration/coverage@5%": 0.21484375,
|
|
"calibration/ece": 0.15614833606636924,
|
|
"calibration/mean_confidence": 0.5529057898582223,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 464.0,
|
|
"completions/max_terminated_length": 464.0,
|
|
"completions/mean_length": 201.409375,
|
|
"completions/mean_terminated_length": 201.409375,
|
|
"completions/min_length": 93.6,
|
|
"completions/min_terminated_length": 93.6,
|
|
"epoch": 0.928,
|
|
"grad_norm": 0.0014505106955766678,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0,
|
|
"num_tokens": 982779906.0,
|
|
"reward": 1.0353971242904663,
|
|
"reward_std": 0.06244761645793915,
|
|
"rewards/accuracy_reward": 0.6052734375,
|
|
"rewards/brier_reward": 0.8278081059455872,
|
|
"rewards/confidence_uniqueness_reward": 0.9446945190429688,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0016608674312010407,
|
|
"rewards/frontier_coverage_1": 0.11747038513422012,
|
|
"rewards/frontier_coverage_10": 0.10292258858680725,
|
|
"rewards/frontier_coverage_15": 0.07811000794172288,
|
|
"rewards/frontier_coverage_20": 0.07272942364215851,
|
|
"rewards/frontier_coverage_25": 0.12077962756156921,
|
|
"rewards/frontier_coverage_5": 0.11747038513422012,
|
|
"rewards/frontier_ece_reward": 0.0025405031628906727,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0777587890625,
|
|
"signal/accuracy_reward/group_std_mean": 0.10788596123456955,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.675,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125,
|
|
"signal/advantage_abs_mean": 0.0458635076880455,
|
|
"signal/advantage_pre_scale_abs_mean": 0.0458635076880455,
|
|
"signal/advantage_pre_scale_std": 0.09391386359930039,
|
|
"signal/advantage_std": 0.09391386359930039,
|
|
"signal/brier_reward/centered_abs_mean": 0.09663857668638229,
|
|
"signal/brier_reward/group_std_mean": 0.12685683369636536,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012079822085797786,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012079822085797786,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02300581932067871,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.028626967594027518,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028757274150848387,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028757274150848387,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015263804234564304,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0025658855913206933,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7322209280100652e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7322209280100652e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.13058804869651794,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.17300075590610503,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002337525924667716,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002337525924667716,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.10771108269691468,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.14299528300762177,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019280282547697424,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019280282547697424,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.0730916753411293,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.09659909605979919,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013083409518003463,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013083409518003463,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05504903867840767,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.07065875232219695,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009853777824901044,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009853777824901044,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06587158292531967,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08444809466600418,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001179101294837892,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001179101294837892,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.13058804869651794,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.17300075590610503,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002337525924667716,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002337525924667716,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002657411713153124,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003528282977640629,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003321764641441405,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003321764641441405,
|
|
"step": 290
|
|
},
|
|
{
|
|
"calibration/aurc": 0.227605377702519,
|
|
"calibration/batch_distribution_entropy": 0.8678024925217093,
|
|
"calibration/buffer_distribution_entropy": 0.9332008553883091,
|
|
"calibration/confidence_entropy": 0.39428912795582655,
|
|
"calibration/coverage@0%": 0.16796875,
|
|
"calibration/coverage@1%": 0.17578125,
|
|
"calibration/coverage@10%": 0.3359375,
|
|
"calibration/coverage@15%": 0.415625,
|
|
"calibration/coverage@20%": 0.48515625,
|
|
"calibration/coverage@25%": 0.56015625,
|
|
"calibration/coverage@30%": 0.61484375,
|
|
"calibration/coverage@5%": 0.2453125,
|
|
"calibration/ece": 0.09917310585392751,
|
|
"calibration/mean_confidence": 0.4722144451024394,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0001953125,
|
|
"completions/max_length": 920.0,
|
|
"completions/max_terminated_length": 489.2,
|
|
"completions/mean_length": 202.98466796875,
|
|
"completions/mean_terminated_length": 202.72425537109376,
|
|
"completions/min_length": 100.2,
|
|
"completions/min_terminated_length": 100.2,
|
|
"epoch": 0.944,
|
|
"grad_norm": 0.0016979072242975235,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 999833893.0,
|
|
"reward": 1.0405084133148192,
|
|
"reward_std": 0.07115750387310982,
|
|
"rewards/accuracy_reward": 0.616015625,
|
|
"rewards/brier_reward": 0.8274973273277283,
|
|
"rewards/confidence_uniqueness_reward": 0.9424091815948487,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.001480784686282277,
|
|
"rewards/frontier_coverage_1": 0.11875949800014496,
|
|
"rewards/frontier_coverage_10": 0.10412099286913871,
|
|
"rewards/frontier_coverage_15": 0.08062837272882462,
|
|
"rewards/frontier_coverage_20": 0.0747826412320137,
|
|
"rewards/frontier_coverage_25": 0.12094295620918274,
|
|
"rewards/frontier_coverage_5": 0.11873992830514908,
|
|
"rewards/frontier_ece_reward": 0.0025975925382226706,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.10352783203125,
|
|
"signal/accuracy_reward/group_std_mean": 0.1342850521206856,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051763916015625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051763916015625,
|
|
"signal/advantage_abs_mean": 0.054445850849151614,
|
|
"signal/advantage_pre_scale_abs_mean": 0.054445850849151614,
|
|
"signal/advantage_pre_scale_std": 0.10500096529722214,
|
|
"signal/advantage_std": 0.10500096529722214,
|
|
"signal/brier_reward/centered_abs_mean": 0.1040783628821373,
|
|
"signal/brier_reward/group_std_mean": 0.13488138020038604,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013009795360267163,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.013009795360267163,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02404037192463875,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03030591309070587,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030050464905798436,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030050464905798436,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001270879921503365,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0021014282014220954,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2748749870515894e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2748749870515894e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.14869227409362792,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.19285742044448853,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026615916285663843,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026615916285663843,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.11862881183624267,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.15488066375255585,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021234555868431928,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021234555868431928,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.07927502691745758,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.10365805774927139,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014190229121595621,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014190229121595621,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05630268827080727,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.0722155287861824,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010078180697746576,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010078180697746576,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06522702798247337,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08425245583057403,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00116756372153759,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00116756372153759,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.14861850142478944,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.19274679124355315,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002660271106287837,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002660271106287837,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002932385681197047,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0038601367734372614,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003665482101496309,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003665482101496309,
|
|
"step": 295
|
|
},
|
|
{
|
|
"calibration/aurc": 0.22753061737104918,
|
|
"calibration/batch_distribution_entropy": 0.8395263422525059,
|
|
"calibration/buffer_distribution_entropy": 0.9327987097688348,
|
|
"calibration/confidence_entropy": 0.3778946036635543,
|
|
"calibration/coverage@0%": 0.21328125,
|
|
"calibration/coverage@1%": 0.215625,
|
|
"calibration/coverage@10%": 0.4578125,
|
|
"calibration/coverage@15%": 0.50625,
|
|
"calibration/coverage@20%": 0.57734375,
|
|
"calibration/coverage@25%": 0.63671875,
|
|
"calibration/coverage@30%": 0.690625,
|
|
"calibration/coverage@5%": 0.4203125,
|
|
"calibration/ece": 0.21041953124999999,
|
|
"calibration/mean_confidence": 0.63975046875,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 9.765625e-05,
|
|
"completions/max_length": 683.0,
|
|
"completions/max_terminated_length": 455.4,
|
|
"completions/mean_length": 202.90927734375,
|
|
"completions/mean_terminated_length": 202.77940368652344,
|
|
"completions/min_length": 99.6,
|
|
"completions/min_terminated_length": 99.6,
|
|
"epoch": 0.96,
|
|
"grad_norm": 0.0016687435563653708,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0002,
|
|
"num_tokens": 1016852004.0,
|
|
"reward": 1.0291436433792114,
|
|
"reward_std": 0.0583199568092823,
|
|
"rewards/accuracy_reward": 0.58271484375,
|
|
"rewards/brier_reward": 0.8477208733558654,
|
|
"rewards/confidence_uniqueness_reward": 0.9432453274726867,
|
|
"rewards/format_reward": 0.9998046875,
|
|
"rewards/frontier_aurc_reward": -0.0021191579522565006,
|
|
"rewards/frontier_coverage_1": 0.1549065351486206,
|
|
"rewards/frontier_coverage_10": 0.12819213569164276,
|
|
"rewards/frontier_coverage_15": 0.09457356631755828,
|
|
"rewards/frontier_coverage_20": 0.08866416066884994,
|
|
"rewards/frontier_coverage_25": 0.14134843051433563,
|
|
"rewards/frontier_coverage_5": 0.15478427112102508,
|
|
"rewards/frontier_ece_reward": 0.0032231774181127547,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.072796630859375,
|
|
"signal/accuracy_reward/group_std_mean": 0.10131096243858337,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363983154296875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363983154296875,
|
|
"signal/advantage_abs_mean": 0.042303390055894854,
|
|
"signal/advantage_pre_scale_abs_mean": 0.042303390055894854,
|
|
"signal/advantage_pre_scale_std": 0.0915198415517807,
|
|
"signal/advantage_std": 0.0915198415517807,
|
|
"signal/brier_reward/centered_abs_mean": 0.08765042722225189,
|
|
"signal/brier_reward/group_std_mean": 0.11655679643154145,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956303402781486,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.010956303402781486,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024246321246027946,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.030507474020123482,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030307901557534932,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030307901557534932,
|
|
"signal/format_reward/centered_abs_mean": 0.00037841796875,
|
|
"signal/format_reward/group_std_mean": 0.0011048543266952038,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017094084527343512,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0029599607922136785,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.059841037611477e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.059841037611477e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12009998559951782,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15766243636608124,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149789733812213,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149789733812213,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09564173370599746,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12548429369926453,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017119870288297534,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017119870288297534,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06683021634817124,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08725652545690536,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011962608667090535,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011962608667090535,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.052955988049507144,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06789801940321923,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009479121654294431,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009479121654294431,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06407563537359237,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08428025245666504,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001146953902207315,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001146953902207315,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1199414610862732,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15745915472507477,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002146952087059617,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002146952087059617,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0024932647589594125,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003281328594312072,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031165809486992656,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031165809486992656,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"eval_calibration/aurc": 0.40288964253104903,
|
|
"eval_calibration/batch_distribution_entropy": 0.9055634924361762,
|
|
"eval_calibration/buffer_distribution_entropy": 0.9315158471490221,
|
|
"eval_calibration/confidence_entropy": 0.4507296505092381,
|
|
"eval_calibration/coverage@0%": 0.0625,
|
|
"eval_calibration/coverage@1%": 0.0625,
|
|
"eval_calibration/coverage@10%": 0.0625,
|
|
"eval_calibration/coverage@15%": 0.15625,
|
|
"eval_calibration/coverage@20%": 0.15625,
|
|
"eval_calibration/coverage@25%": 0.28125,
|
|
"eval_calibration/coverage@30%": 0.3125,
|
|
"eval_calibration/coverage@5%": 0.0625,
|
|
"eval_calibration/ece": 0.2239203125,
|
|
"eval_calibration/mean_confidence": 0.5807953125,
|
|
"eval_completions/clipped_ratio": 0.0,
|
|
"eval_completions/max_length": 390.0,
|
|
"eval_completions/max_terminated_length": 390.0,
|
|
"eval_completions/mean_length": 201.66341400146484,
|
|
"eval_completions/mean_terminated_length": 201.66341400146484,
|
|
"eval_completions/min_length": 103.0,
|
|
"eval_completions/min_terminated_length": 103.0,
|
|
"eval_loss": 0.0,
|
|
"eval_num_tokens": 1016852004.0,
|
|
"eval_reward": 0.9454332888126373,
|
|
"eval_reward_std": 0.25646254420280457,
|
|
"eval_rewards/accuracy_reward": 0.44140625,
|
|
"eval_rewards/brier_reward": 0.7841920852661133,
|
|
"eval_rewards/confidence_uniqueness_reward": 0.8974609375,
|
|
"eval_rewards/format_reward": 1.0,
|
|
"eval_rewards/frontier_aurc_reward": -0.005405109841376543,
|
|
"eval_rewards/frontier_coverage_1": 0.19863545149564743,
|
|
"eval_rewards/frontier_coverage_10": 0.15553244948387146,
|
|
"eval_rewards/frontier_coverage_15": 0.10400541499257088,
|
|
"eval_rewards/frontier_coverage_20": 0.06785453855991364,
|
|
"eval_rewards/frontier_coverage_25": 0.06973126530647278,
|
|
"eval_rewards/frontier_coverage_5": 0.19848963618278503,
|
|
"eval_rewards/frontier_ece_reward": 0.0032259345753118396,
|
|
"eval_runtime": 10.2094,
|
|
"eval_samples_per_second": 48.974,
|
|
"eval_signal/accuracy_reward/centered_abs_mean": 0.4755859375,
|
|
"eval_signal/accuracy_reward/group_std_mean": 0.49512895941734314,
|
|
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23779296875,
|
|
"eval_signal/accuracy_reward/weight": 0.5,
|
|
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23779296875,
|
|
"eval_signal/advantage_abs_mean": 0.24011892080307007,
|
|
"eval_signal/advantage_pre_scale_abs_mean": 0.24011892080307007,
|
|
"eval_signal/advantage_pre_scale_std": 0.2530638575553894,
|
|
"eval_signal/advantage_std": 0.2530638575553894,
|
|
"eval_signal/brier_reward/centered_abs_mean": 0.2406034916639328,
|
|
"eval_signal/brier_reward/group_std_mean": 0.29827988147735596,
|
|
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0300754364579916,
|
|
"eval_signal/brier_reward/weight": 0.125,
|
|
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0300754364579916,
|
|
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0444183349609375,
|
|
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051535068079829216,
|
|
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055522918701171875,
|
|
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055522918701171875,
|
|
"eval_signal/format_reward/centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/group_std_mean": 0.0,
|
|
"eval_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/format_reward/weight": 0.5,
|
|
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007637398317456245,
|
|
"eval_signal/frontier_aurc_reward/group_std_mean": 0.01684427261352539,
|
|
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013670942280441523,
|
|
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013670942280441523,
|
|
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.31974154710769653,
|
|
"eval_signal/frontier_coverage_1/group_std_mean": 0.3967055380344391,
|
|
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005723373498767614,
|
|
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005723373498767614,
|
|
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2480403035879135,
|
|
"eval_signal/frontier_coverage_10/group_std_mean": 0.31127846240997314,
|
|
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004439921351149678,
|
|
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004439921351149678,
|
|
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15623818337917328,
|
|
"eval_signal/frontier_coverage_15/group_std_mean": 0.20206287503242493,
|
|
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027966632042080164,
|
|
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027966632042080164,
|
|
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10376439616084099,
|
|
"eval_signal/frontier_coverage_20/group_std_mean": 0.13048581779003143,
|
|
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018573826528154314,
|
|
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018573826528154314,
|
|
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19033470749855042,
|
|
"eval_signal/frontier_coverage_25/group_std_mean": 0.24326416850090027,
|
|
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034069910179823637,
|
|
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034069910179823637,
|
|
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.31926435232162476,
|
|
"eval_signal/frontier_coverage_5/group_std_mean": 0.3961242437362671,
|
|
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057148318737745285,
|
|
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057148318737745285,
|
|
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.004807816818356514,
|
|
"eval_signal/frontier_ece_reward/group_std_mean": 0.00625448627397418,
|
|
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006009771022945642,
|
|
"eval_signal/frontier_ece_reward/weight": 0.125,
|
|
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006009771022945642,
|
|
"eval_steps_per_second": 0.196,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"step": 300,
|
|
"train_probe_calibration/aurc": 0.11477367801974697,
|
|
"train_probe_calibration/batch_distribution_entropy": 0.8259865898626315,
|
|
"train_probe_calibration/buffer_distribution_entropy": 0.9317202950173966,
|
|
"train_probe_calibration/confidence_entropy": 0.39752484027856694,
|
|
"train_probe_calibration/coverage@0%": 0.140625,
|
|
"train_probe_calibration/coverage@1%": 0.140625,
|
|
"train_probe_calibration/coverage@10%": 0.734375,
|
|
"train_probe_calibration/coverage@15%": 0.8125,
|
|
"train_probe_calibration/coverage@20%": 0.875,
|
|
"train_probe_calibration/coverage@25%": 0.9375,
|
|
"train_probe_calibration/coverage@30%": 0.96875,
|
|
"train_probe_calibration/coverage@5%": 0.421875,
|
|
"train_probe_calibration/ece": 0.16384375,
|
|
"train_probe_calibration/mean_confidence": 0.65728125,
|
|
"train_probe_completions/clipped_ratio": 0.0,
|
|
"train_probe_completions/max_length": 349.0,
|
|
"train_probe_completions/max_terminated_length": 349.0,
|
|
"train_probe_completions/mean_length": 201.03668975830078,
|
|
"train_probe_completions/mean_terminated_length": 201.03668975830078,
|
|
"train_probe_completions/min_length": 111.5,
|
|
"train_probe_completions/min_terminated_length": 111.5,
|
|
"train_probe_loss": 0.0,
|
|
"train_probe_num_tokens": 1016852004.0,
|
|
"train_probe_reward": 1.0789863467216492,
|
|
"train_probe_reward_std": 0.22811973094940186,
|
|
"train_probe_rewards/accuracy_reward": 0.693359375,
|
|
"train_probe_rewards/brier_reward": 0.8687321543693542,
|
|
"train_probe_rewards/confidence_uniqueness_reward": 0.900390625,
|
|
"train_probe_rewards/format_reward": 1.0,
|
|
"train_probe_rewards/frontier_aurc_reward": -0.001137724844738841,
|
|
"train_probe_rewards/frontier_coverage_1": 0.09572022780776024,
|
|
"train_probe_rewards/frontier_coverage_10": 0.0828697718679905,
|
|
"train_probe_rewards/frontier_coverage_15": 0.07084467262029648,
|
|
"train_probe_rewards/frontier_coverage_20": 0.08797503262758255,
|
|
"train_probe_rewards/frontier_coverage_25": 0.17389176040887833,
|
|
"train_probe_rewards/frontier_coverage_5": 0.09541856124997139,
|
|
"train_probe_rewards/frontier_ece_reward": 0.0026104446733370423,
|
|
"train_probe_runtime": 9.6991,
|
|
"train_probe_samples_per_second": 51.551,
|
|
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4151611328125,
|
|
"train_probe_signal/accuracy_reward/group_std_mean": 0.46192415058612823,
|
|
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20758056640625,
|
|
"train_probe_signal/accuracy_reward/weight": 0.5,
|
|
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20758056640625,
|
|
"train_probe_signal/advantage_abs_mean": 0.20193417370319366,
|
|
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20193417370319366,
|
|
"train_probe_signal/advantage_pre_scale_std": 0.2256685495376587,
|
|
"train_probe_signal/advantage_std": 0.2256685495376587,
|
|
"train_probe_signal/brier_reward/centered_abs_mean": 0.15471985936164856,
|
|
"train_probe_signal/brier_reward/group_std_mean": 0.21638543158769608,
|
|
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01933998242020607,
|
|
"train_probe_signal/brier_reward/weight": 0.125,
|
|
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01933998242020607,
|
|
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039031982421875,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.04604136198759079,
|
|
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004878997802734375,
|
|
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004878997802734375,
|
|
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_std_mean": 0.0,
|
|
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
|
|
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/format_reward/weight": 0.5,
|
|
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020724779460579157,
|
|
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003993918187916279,
|
|
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.709735210577492e-05,
|
|
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.709735210577492e-05,
|
|
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2661040276288986,
|
|
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.37349459528923035,
|
|
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004763261880725622,
|
|
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004763261880725622,
|
|
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.20318175852298737,
|
|
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.28969065845012665,
|
|
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036369531881064177,
|
|
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036369531881064177,
|
|
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12607631087303162,
|
|
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.18381474167108536,
|
|
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022567659616470337,
|
|
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022567659616470337,
|
|
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.08980197459459305,
|
|
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.11331581324338913,
|
|
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016074551967903972,
|
|
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016074551967903972,
|
|
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.17201132327318192,
|
|
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2077884078025818,
|
|
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030790024902671576,
|
|
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030790024902671576,
|
|
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.26485244929790497,
|
|
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3718564957380295,
|
|
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004740858683362603,
|
|
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004740858683362603,
|
|
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.004174819332547486,
|
|
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.005938299465924501,
|
|
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
|
|
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005218524165684357,
|
|
"train_probe_signal/frontier_ece_reward/weight": 0.125,
|
|
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005218524165684357,
|
|
"train_probe_steps_per_second": 0.206
|
|
},
|
|
{
|
|
"calibration/aurc": 0.19888012009032868,
|
|
"calibration/batch_distribution_entropy": 0.8751961847929361,
|
|
"calibration/buffer_distribution_entropy": 0.9316692924834962,
|
|
"calibration/confidence_entropy": 0.4188681558773877,
|
|
"calibration/coverage@0%": 0.0359375,
|
|
"calibration/coverage@1%": 0.0359375,
|
|
"calibration/coverage@10%": 0.43515625,
|
|
"calibration/coverage@15%": 0.4859375,
|
|
"calibration/coverage@20%": 0.53671875,
|
|
"calibration/coverage@25%": 0.61484375,
|
|
"calibration/coverage@30%": 0.72421875,
|
|
"calibration/coverage@5%": 0.18515625,
|
|
"calibration/ece": 0.14540309140625002,
|
|
"calibration/mean_confidence": 0.64485612734375,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.00029296875,
|
|
"completions/max_length": 931.6,
|
|
"completions/max_terminated_length": 540.2,
|
|
"completions/mean_length": 205.41650390625,
|
|
"completions/mean_terminated_length": 205.0261260986328,
|
|
"completions/min_length": 101.4,
|
|
"completions/min_terminated_length": 101.4,
|
|
"epoch": 0.976,
|
|
"grad_norm": 0.0016158220823854208,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0008,
|
|
"num_tokens": 1033816589.0,
|
|
"reward": 1.0453666210174561,
|
|
"reward_std": 0.06708120256662368,
|
|
"rewards/accuracy_reward": 0.62216796875,
|
|
"rewards/brier_reward": 0.8398854255676269,
|
|
"rewards/confidence_uniqueness_reward": 0.9433197736740112,
|
|
"rewards/format_reward": 0.99970703125,
|
|
"rewards/frontier_aurc_reward": -0.0018908762140199542,
|
|
"rewards/frontier_coverage_1": 0.11496728807687759,
|
|
"rewards/frontier_coverage_10": 0.09931781068444252,
|
|
"rewards/frontier_coverage_15": 0.07630908414721489,
|
|
"rewards/frontier_coverage_20": 0.07998319193720818,
|
|
"rewards/frontier_coverage_25": 0.1425451785326004,
|
|
"rewards/frontier_coverage_5": 0.11488909721374511,
|
|
"rewards/frontier_ece_reward": 0.0025672421557828783,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.084222412109375,
|
|
"signal/accuracy_reward/group_std_mean": 0.11655709967017173,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421112060546875,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421112060546875,
|
|
"signal/advantage_abs_mean": 0.048955275863409045,
|
|
"signal/advantage_pre_scale_abs_mean": 0.048955275863409045,
|
|
"signal/advantage_pre_scale_std": 0.10104106813669204,
|
|
"signal/advantage_std": 0.10104106813669204,
|
|
"signal/brier_reward/centered_abs_mean": 0.09183044731616974,
|
|
"signal/brier_reward/group_std_mean": 0.12157261669635773,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011478805914521217,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011478805914521217,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421579249203205,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03093937486410141,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030269740615040063,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030269740615040063,
|
|
"signal/format_reward/centered_abs_mean": 0.000555419921875,
|
|
"signal/format_reward/group_std_mean": 0.0013209730386734009,
|
|
"signal/format_reward/group_zero_std_frac": 0.99375,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015749115496873855,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0027885420713573694,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.819091714627575e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.819091714627575e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12192182391881942,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.15978844761848449,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002182400575838983,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002182400575838983,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09427153617143631,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12419438064098358,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016874604858458041,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016874604858458041,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06523038446903229,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.08563594371080399,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011676238849759103,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011676238849759103,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05165816843509674,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06644249334931374,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009246811503544449,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009246811503544449,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06867350712418556,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.09026172012090683,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012292557861655951,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012292557861655951,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.1216941773891449,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.15950067937374116,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021783256670460105,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021783256670460105,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.00236211777664721,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003097822656854987,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029526472208090125,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029526472208090125,
|
|
"step": 305
|
|
},
|
|
{
|
|
"calibration/aurc": 0.27617015723730154,
|
|
"calibration/batch_distribution_entropy": 0.8719944630571315,
|
|
"calibration/buffer_distribution_entropy": 0.9307879294053564,
|
|
"calibration/confidence_entropy": 0.3881166828523776,
|
|
"calibration/coverage@0%": 0.15625,
|
|
"calibration/coverage@1%": 0.18515625,
|
|
"calibration/coverage@10%": 0.31796875,
|
|
"calibration/coverage@15%": 0.36171875,
|
|
"calibration/coverage@20%": 0.4203125,
|
|
"calibration/coverage@25%": 0.48046875,
|
|
"calibration/coverage@30%": 0.53515625,
|
|
"calibration/coverage@5%": 0.27578125,
|
|
"calibration/ece": 0.1480644396551724,
|
|
"calibration/mean_confidence": 0.4999269396551724,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 497.4,
|
|
"completions/max_terminated_length": 497.4,
|
|
"completions/mean_length": 200.55478515625,
|
|
"completions/mean_terminated_length": 200.55478515625,
|
|
"completions/min_length": 91.2,
|
|
"completions/min_terminated_length": 91.2,
|
|
"epoch": 0.992,
|
|
"grad_norm": 0.0018319895025342703,
|
|
"learning_rate": 1e-06,
|
|
"loss": 0.0001,
|
|
"num_tokens": 1050998750.0,
|
|
"reward": 1.0178974866867065,
|
|
"reward_std": 0.062009623646736144,
|
|
"rewards/accuracy_reward": 0.571875,
|
|
"rewards/brier_reward": 0.818060839176178,
|
|
"rewards/confidence_uniqueness_reward": 0.9377853393554687,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.0024078495102003218,
|
|
"rewards/frontier_coverage_1": 0.13882496058940888,
|
|
"rewards/frontier_coverage_10": 0.11529144048690795,
|
|
"rewards/frontier_coverage_15": 0.08634113371372223,
|
|
"rewards/frontier_coverage_20": 0.08047932088375091,
|
|
"rewards/frontier_coverage_25": 0.1226568266749382,
|
|
"rewards/frontier_coverage_5": 0.13870886862277984,
|
|
"rewards/frontier_ece_reward": 0.0024728897726163266,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.08270263671875,
|
|
"signal/accuracy_reward/group_std_mean": 0.10843254029750823,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041351318359375,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041351318359375,
|
|
"signal/advantage_abs_mean": 0.04691413417458534,
|
|
"signal/advantage_pre_scale_abs_mean": 0.04691413417458534,
|
|
"signal/advantage_pre_scale_std": 0.09592601060867309,
|
|
"signal/advantage_std": 0.09592601060867309,
|
|
"signal/brier_reward/centered_abs_mean": 0.0934365376830101,
|
|
"signal/brier_reward/group_std_mean": 0.12053980976343155,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011679567210376263,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.011679567210376263,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025814294815063477,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.03233279511332512,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032267868518829346,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032267868518829346,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.001967202941887081,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.0030709158163517714,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521293183439411e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521293183439411e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12599806636571884,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16160787940025328,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022553652757778763,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022553652757778763,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09844744727015495,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.12621570527553558,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001762209297157824,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001762209297157824,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06843779757618904,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0877251997590065,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012250364990904928,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012250364990904928,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05275077372789383,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06691490858793259,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009442388545721769,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009442388545721769,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06476361751556396,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08386294692754745,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011592687340453267,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011592687340453267,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12582006603479384,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16137229949235915,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022521790815517306,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022521790815517306,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.0023749925196170805,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.003116936841979623,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029687406495213506,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029687406495213506,
|
|
"step": 310
|
|
},
|
|
{
|
|
"calibration/aurc": 0.07394829778523786,
|
|
"calibration/batch_distribution_entropy": 0.6858898086044589,
|
|
"calibration/buffer_distribution_entropy": 0.9311619746607875,
|
|
"calibration/confidence_entropy": 0.34007166230520547,
|
|
"calibration/coverage@0%": 0.10546875,
|
|
"calibration/coverage@1%": 0.10546875,
|
|
"calibration/coverage@10%": 0.802734375,
|
|
"calibration/coverage@15%": 0.943359375,
|
|
"calibration/coverage@20%": 0.970703125,
|
|
"calibration/coverage@25%": 1.0,
|
|
"calibration/coverage@30%": 1.0,
|
|
"calibration/coverage@5%": 0.5,
|
|
"calibration/ece": 0.136876953125,
|
|
"calibration/mean_confidence": 0.7878769531250001,
|
|
"clip_ratio/high_max": 0.0,
|
|
"clip_ratio/high_mean": 0.0,
|
|
"clip_ratio/low_mean": 0.0,
|
|
"clip_ratio/low_min": 0.0,
|
|
"clip_ratio/region_mean": 0.0,
|
|
"completions/clipped_ratio": 0.0,
|
|
"completions/max_length": 386.5,
|
|
"completions/max_terminated_length": 386.5,
|
|
"completions/mean_length": 199.02162170410156,
|
|
"completions/mean_terminated_length": 199.02162170410156,
|
|
"completions/min_length": 96.0,
|
|
"completions/min_terminated_length": 96.0,
|
|
"epoch": 0.9984,
|
|
"num_tokens": 1057815101.0,
|
|
"reward": 1.050271451473236,
|
|
"reward_std": 0.06441943719983101,
|
|
"rewards/accuracy_reward": 0.645263671875,
|
|
"rewards/brier_reward": 0.8100776672363281,
|
|
"rewards/confidence_uniqueness_reward": 0.9436569213867188,
|
|
"rewards/format_reward": 1.0,
|
|
"rewards/frontier_aurc_reward": -0.001602545497007668,
|
|
"rewards/frontier_coverage_1": 0.07218670099973679,
|
|
"rewards/frontier_coverage_10": 0.0615706741809845,
|
|
"rewards/frontier_coverage_15": 0.05613754317164421,
|
|
"rewards/frontier_coverage_20": 0.06846107542514801,
|
|
"rewards/frontier_coverage_25": 0.12920933216810226,
|
|
"rewards/frontier_coverage_5": 0.07211882993578911,
|
|
"rewards/frontier_ece_reward": 0.0017852028249762952,
|
|
"signal/accuracy_reward/centered_abs_mean": 0.0796661376953125,
|
|
"signal/accuracy_reward/group_std_mean": 0.11285967007279396,
|
|
"signal/accuracy_reward/group_zero_std_frac": 0.6484375,
|
|
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03983306884765625,
|
|
"signal/accuracy_reward/weight": 0.5,
|
|
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03983306884765625,
|
|
"signal/advantage_abs_mean": 0.047435952350497246,
|
|
"signal/advantage_pre_scale_abs_mean": 0.047435952350497246,
|
|
"signal/advantage_pre_scale_std": 0.09802256524562836,
|
|
"signal/advantage_std": 0.09802256524562836,
|
|
"signal/brier_reward/centered_abs_mean": 0.10139483213424683,
|
|
"signal/brier_reward/group_std_mean": 0.12895793095231056,
|
|
"signal/brier_reward/group_zero_std_frac": 0.0,
|
|
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012674354016780853,
|
|
"signal/brier_reward/weight": 0.125,
|
|
"signal/brier_reward/weighted_centered_abs_mean": 0.012674354016780853,
|
|
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023676156997680664,
|
|
"signal/confidence_uniqueness_reward/group_std_mean": 0.029014757834374905,
|
|
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
|
|
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002959519624710083,
|
|
"signal/confidence_uniqueness_reward/weight": 0.125,
|
|
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002959519624710083,
|
|
"signal/format_reward/centered_abs_mean": 0.0,
|
|
"signal/format_reward/group_std_mean": 0.0,
|
|
"signal/format_reward/group_zero_std_frac": 1.0,
|
|
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
|
|
"signal/format_reward/weight": 0.5,
|
|
"signal/format_reward/weighted_centered_abs_mean": 0.0,
|
|
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015349971363320947,
|
|
"signal/frontier_aurc_reward/group_std_mean": 0.002599976258352399,
|
|
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
|
|
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.747644975897856e-05,
|
|
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
|
|
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.747644975897856e-05,
|
|
"signal/frontier_coverage_1/centered_abs_mean": 0.12439806759357452,
|
|
"signal/frontier_coverage_1/group_std_mean": 0.16710513830184937,
|
|
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002226725220680237,
|
|
"signal/frontier_coverage_1/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002226725220680237,
|
|
"signal/frontier_coverage_10/centered_abs_mean": 0.09704583883285522,
|
|
"signal/frontier_coverage_10/group_std_mean": 0.13068146258592606,
|
|
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017371204448863864,
|
|
"signal/frontier_coverage_10/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017371204448863864,
|
|
"signal/frontier_coverage_15/centered_abs_mean": 0.06808548793196678,
|
|
"signal/frontier_coverage_15/group_std_mean": 0.0913914144039154,
|
|
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001218730176333338,
|
|
"signal/frontier_coverage_15/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001218730176333338,
|
|
"signal/frontier_coverage_20/centered_abs_mean": 0.05189245194196701,
|
|
"signal/frontier_coverage_20/group_std_mean": 0.06728483736515045,
|
|
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009288748260587454,
|
|
"signal/frontier_coverage_20/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009288748260587454,
|
|
"signal/frontier_coverage_25/centered_abs_mean": 0.06688933074474335,
|
|
"signal/frontier_coverage_25/group_std_mean": 0.08471940457820892,
|
|
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011973190703429282,
|
|
"signal/frontier_coverage_25/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011973190703429282,
|
|
"signal/frontier_coverage_5/centered_abs_mean": 0.12402944266796112,
|
|
"signal/frontier_coverage_5/group_std_mean": 0.16661176830530167,
|
|
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
|
|
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002220126916654408,
|
|
"signal/frontier_coverage_5/weight": 0.017899999395012856,
|
|
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002220126916654408,
|
|
"signal/frontier_ece_reward/centered_abs_mean": 0.002541982219554484,
|
|
"signal/frontier_ece_reward/group_std_mean": 0.0034003107575699687,
|
|
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
|
|
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003177477774443105,
|
|
"signal/frontier_ece_reward/weight": 0.125,
|
|
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003177477774443105,
|
|
"step": 312,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.00456765069126656,
|
|
"train_runtime": 31094.48,
|
|
"train_samples_per_second": 0.643,
|
|
"train_steps_per_second": 0.01
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 312,
|
|
"num_input_tokens_seen": 1057815101,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 60,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|