Files
RLCR-v4-ks-uniqueness-hotpo…/trainer_state.json
ModelHub XC ac06da513f 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-hotpot-aliases-qwen35-balanced-fullnode-ga32
Source: Original Platform
2026-04-24 04:37:14 +08:00

9284 lines
572 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.574161369417126,
"calibration/batch_distribution_entropy": 0.6217632380850391,
"calibration/confidence_entropy": 0.3449140549111297,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.07282051282051281,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.47335296160366747,
"calibration/mean_confidence": 0.8051669529651726,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0361328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1501.2,
"completions/mean_length": 268.3080078125,
"completions/mean_terminated_length": 220.78201293945312,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.11463230848312378,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0912,
"num_tokens": 17591506.0,
"reward": 0.6744200468063355,
"reward_std": 0.49649240970611574,
"rewards/accuracy_reward": 0.26630859375,
"rewards/brier_reward": 0.4115479052066803,
"rewards/confidence_uniqueness_reward": 0.4812490105628967,
"rewards/format_reward": 0.68798828125,
"rewards/frontier_aurc_reward": 0.3422773241996765,
"rewards/frontier_coverage_1": 0.3422773241996765,
"rewards/frontier_coverage_10": 0.3422773241996765,
"rewards/frontier_coverage_15": 0.3422773241996765,
"rewards/frontier_coverage_20": 0.3422773241996765,
"rewards/frontier_coverage_25": 0.3422773241996765,
"rewards/frontier_coverage_5": 0.3422773241996765,
"rewards/frontier_ece_reward": 0.3422773241996765,
"signal/accuracy_reward/centered_abs_mean": 0.274066162109375,
"signal/accuracy_reward/group_std_mean": 0.31360672116279603,
"signal/accuracy_reward/group_zero_std_frac": 0.26875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1370330810546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1370330810546875,
"signal/advantage_abs_mean": 0.42605471014976504,
"signal/advantage_pre_scale_abs_mean": 0.42605471014976504,
"signal/advantage_pre_scale_std": 0.5046224594116211,
"signal/advantage_std": 0.5046224594116211,
"signal/brier_reward/centered_abs_mean": 0.33465067148208616,
"signal/brier_reward/group_std_mean": 0.3789239704608917,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04183133393526077,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04183133393526077,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2928457796573639,
"signal/confidence_uniqueness_reward/group_std_mean": 0.34470821022987364,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03660572245717049,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03660572245717049,
"signal/format_reward/centered_abs_mean": 0.394317626953125,
"signal/format_reward/group_std_mean": 0.4479940414428711,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1971588134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1971588134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.31442518830299376,
"signal/frontier_aurc_reward/group_std_mean": 0.3627101004123688,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_1/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_1/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_10/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_10/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_15/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_15/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_20/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_20/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_25/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_25/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_5/centered_abs_mean": 0.31442518830299376,
"signal/frontier_coverage_5/group_std_mean": 0.3627101004123688,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005628210585564375,
"signal/frontier_ece_reward/centered_abs_mean": 0.31442518830299376,
"signal/frontier_ece_reward/group_std_mean": 0.3627101004123688,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03930314853787422,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03930314853787422,
"step": 5
},
{
"calibration/aurc": 0.6252603742483307,
"calibration/batch_distribution_entropy": 0.6553918903963509,
"calibration/confidence_entropy": 0.3561837908236652,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4639029824341743,
"calibration/mean_confidence": 0.7835663529333339,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03310546875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1525.8,
"completions/mean_length": 253.1953125,
"completions/mean_terminated_length": 209.34018249511718,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.038865186274051666,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0913,
"num_tokens": 35284578.0,
"reward": 0.7065129756927491,
"reward_std": 0.4615809082984924,
"rewards/accuracy_reward": 0.25361328125,
"rewards/brier_reward": 0.4207825243473053,
"rewards/confidence_uniqueness_reward": 0.5312160611152649,
"rewards/format_reward": 0.75029296875,
"rewards/frontier_aurc_reward": 0.3418300449848175,
"rewards/frontier_coverage_1": 0.3418300449848175,
"rewards/frontier_coverage_10": 0.3418300449848175,
"rewards/frontier_coverage_15": 0.3418300449848175,
"rewards/frontier_coverage_20": 0.3418300449848175,
"rewards/frontier_coverage_25": 0.3418300449848175,
"rewards/frontier_coverage_5": 0.3418300449848175,
"rewards/frontier_ece_reward": 0.3418300449848175,
"signal/accuracy_reward/centered_abs_mean": 0.252484130859375,
"signal/accuracy_reward/group_std_mean": 0.3011133372783661,
"signal/accuracy_reward/group_zero_std_frac": 0.259375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1262420654296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1262420654296875,
"signal/advantage_abs_mean": 0.37834325432777405,
"signal/advantage_pre_scale_abs_mean": 0.37834325432777405,
"signal/advantage_pre_scale_std": 0.470405113697052,
"signal/advantage_std": 0.470405113697052,
"signal/brier_reward/centered_abs_mean": 0.3180624425411224,
"signal/brier_reward/group_std_mean": 0.36653814315795896,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0397578053176403,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0397578053176403,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.26791125535964966,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3284755825996399,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03348890691995621,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03348890691995621,
"signal/format_reward/centered_abs_mean": 0.348736572265625,
"signal/format_reward/group_std_mean": 0.4197552680969238,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1743682861328125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1743682861328125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2994271457195282,
"signal/frontier_aurc_reward/group_std_mean": 0.35230074524879457,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_1/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_1/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_10/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_10/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_15/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_15/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_20/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_20/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_25/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_25/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_5/centered_abs_mean": 0.2994271457195282,
"signal/frontier_coverage_5/group_std_mean": 0.35230074524879457,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00535974558442831,
"signal/frontier_ece_reward/centered_abs_mean": 0.2994271457195282,
"signal/frontier_ece_reward/group_std_mean": 0.35230074524879457,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03742839321494103,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03742839321494103,
"step": 10
},
{
"calibration/aurc": 0.5063660563094874,
"calibration/batch_distribution_entropy": 0.6456137890333957,
"calibration/confidence_entropy": 0.3523480270306522,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.40836266399469745,
"calibration/mean_confidence": 0.803510317425796,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0111328125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1402.4,
"completions/mean_length": 187.18408203125,
"completions/mean_terminated_length": 172.0759704589844,
"completions/min_length": 8.8,
"completions/min_terminated_length": 8.8,
"epoch": 0.048,
"grad_norm": 0.3801707625389099,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0454,
"num_tokens": 52250079.0,
"reward": 0.874800705909729,
"reward_std": 0.349214905500412,
"rewards/accuracy_reward": 0.3330078125,
"rewards/brier_reward": 0.5289363861083984,
"rewards/confidence_uniqueness_reward": 0.6511791348457336,
"rewards/format_reward": 0.90400390625,
"rewards/frontier_aurc_reward": 0.434600293636322,
"rewards/frontier_coverage_1": 0.434600293636322,
"rewards/frontier_coverage_10": 0.434600293636322,
"rewards/frontier_coverage_15": 0.434600293636322,
"rewards/frontier_coverage_20": 0.434600293636322,
"rewards/frontier_coverage_25": 0.434600293636322,
"rewards/frontier_coverage_5": 0.434600293636322,
"rewards/frontier_ece_reward": 0.434600293636322,
"signal/accuracy_reward/centered_abs_mean": 0.21435546875,
"signal/accuracy_reward/group_std_mean": 0.2680795192718506,
"signal/accuracy_reward/group_zero_std_frac": 0.3,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.107177734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.107177734375,
"signal/advantage_abs_mean": 0.2617394238710403,
"signal/advantage_pre_scale_abs_mean": 0.2617394238710403,
"signal/advantage_pre_scale_std": 0.3631041467189789,
"signal/advantage_std": 0.3631041467189789,
"signal/brier_reward/centered_abs_mean": 0.26632643938064576,
"signal/brier_reward/group_std_mean": 0.32310367822647096,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03329080492258072,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03329080492258072,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.18657545149326324,
"signal/confidence_uniqueness_reward/group_std_mean": 0.24681947529315948,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023321931436657905,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023321931436657905,
"signal/format_reward/centered_abs_mean": 0.161785888671875,
"signal/format_reward/group_std_mean": 0.25919924676418304,
"signal/format_reward/group_zero_std_frac": 0.125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0808929443359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0808929443359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.26505063772201537,
"signal/frontier_aurc_reward/group_std_mean": 0.3231291711330414,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_1/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_1/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_10/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_10/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_15/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_15/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_20/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_20/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_25/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_25/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_5/centered_abs_mean": 0.26505063772201537,
"signal/frontier_coverage_5/group_std_mean": 0.3231291711330414,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0047444062307477,
"signal/frontier_ece_reward/centered_abs_mean": 0.26505063772201537,
"signal/frontier_ece_reward/group_std_mean": 0.3231291711330414,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03313132971525192,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03313132971525192,
"step": 15
},
{
"calibration/aurc": 0.4569322433246872,
"calibration/batch_distribution_entropy": 0.701901124719379,
"calibration/confidence_entropy": 0.368413807477986,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.14661354581673308,
"calibration/coverage@30%": 0.26693227091633465,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.31210808530583545,
"calibration/mean_confidence": 0.7689342707600968,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00302734375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 980.0,
"completions/mean_length": 133.61025390625,
"completions/mean_terminated_length": 129.3588897705078,
"completions/min_length": 26.4,
"completions/min_terminated_length": 26.4,
"epoch": 0.064,
"grad_norm": 0.03475378826260567,
"learning_rate": 1e-06,
"loss": 0.0134,
"num_tokens": 68536648.0,
"reward": 0.9799639225006104,
"reward_std": 0.26246568858623504,
"rewards/accuracy_reward": 0.39072265625,
"rewards/brier_reward": 0.6048341035842896,
"rewards/confidence_uniqueness_reward": 0.738007652759552,
"rewards/format_reward": 0.98427734375,
"rewards/frontier_aurc_reward": 0.4978376030921936,
"rewards/frontier_coverage_1": 0.4978376030921936,
"rewards/frontier_coverage_10": 0.4978376030921936,
"rewards/frontier_coverage_15": 0.4978376030921936,
"rewards/frontier_coverage_20": 0.4978376030921936,
"rewards/frontier_coverage_25": 0.4978376030921936,
"rewards/frontier_coverage_5": 0.4978376030921936,
"rewards/frontier_ece_reward": 0.4978376030921936,
"signal/accuracy_reward/centered_abs_mean": 0.214776611328125,
"signal/accuracy_reward/group_std_mean": 0.2681283473968506,
"signal/accuracy_reward/group_zero_std_frac": 0.296875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1073883056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1073883056640625,
"signal/advantage_abs_mean": 0.20265749394893645,
"signal/advantage_pre_scale_abs_mean": 0.20265749394893645,
"signal/advantage_pre_scale_std": 0.2796101540327072,
"signal/advantage_std": 0.2796101540327072,
"signal/brier_reward/centered_abs_mean": 0.23721030354499817,
"signal/brier_reward/group_std_mean": 0.2935959815979004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02965128794312477,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02965128794312477,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.12453396171331406,
"signal/confidence_uniqueness_reward/group_std_mean": 0.15769868493080139,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015566745214164257,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015566745214164257,
"signal/format_reward/centered_abs_mean": 0.029925537109375,
"signal/format_reward/group_std_mean": 0.07638685405254364,
"signal/format_reward/group_zero_std_frac": 0.609375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0149627685546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0149627685546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2561593323945999,
"signal/frontier_aurc_reward/group_std_mean": 0.3122838795185089,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_1/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_1/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_10/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_10/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_15/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_15/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_20/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_20/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_25/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_25/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_5/centered_abs_mean": 0.2561593323945999,
"signal/frontier_coverage_5/group_std_mean": 0.3122838795185089,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045852516777813435,
"signal/frontier_ece_reward/centered_abs_mean": 0.2561593323945999,
"signal/frontier_ece_reward/group_std_mean": 0.3122838795185089,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03201991654932499,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03201991654932499,
"step": 20
},
{
"calibration/aurc": 0.5084255341898481,
"calibration/batch_distribution_entropy": 0.8164247601318154,
"calibration/buffer_distribution_entropy": 0.7090379807345808,
"calibration/confidence_entropy": 0.45600894852795226,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.029133858267716535,
"calibration/coverage@30%": 0.07401574803149606,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3110133148570779,
"calibration/mean_confidence": 0.6897827998864032,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 1409.6,
"completions/max_terminated_length": 799.0,
"completions/mean_length": 118.04287109375,
"completions/mean_terminated_length": 116.93400421142579,
"completions/min_length": 38.8,
"completions/min_terminated_length": 38.8,
"epoch": 0.08,
"grad_norm": 0.00825721025466919,
"learning_rate": 1e-06,
"loss": 0.0023,
"num_tokens": 84678559.0,
"reward": 0.9916581392288208,
"reward_std": 0.19244107306003572,
"rewards/accuracy_reward": 0.4255859375,
"rewards/brier_reward": 0.6843536019325256,
"rewards/confidence_uniqueness_reward": 0.817476212978363,
"rewards/format_reward": 0.996484375,
"rewards/frontier_aurc_reward": 0.3578193149529397,
"rewards/frontier_coverage_1": 0.3889893189072609,
"rewards/frontier_coverage_10": 0.3889893189072609,
"rewards/frontier_coverage_15": 0.3889893189072609,
"rewards/frontier_coverage_20": 0.3889893189072609,
"rewards/frontier_coverage_25": 0.3889893189072609,
"rewards/frontier_coverage_5": 0.3889893189072609,
"rewards/frontier_ece_reward": 0.3576948957517743,
"signal/accuracy_reward/centered_abs_mean": 0.18526611328125,
"signal/accuracy_reward/group_std_mean": 0.23501957356929778,
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092633056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.092633056640625,
"signal/advantage_abs_mean": 0.1510450452566147,
"signal/advantage_pre_scale_abs_mean": 0.1510450452566147,
"signal/advantage_pre_scale_std": 0.21218505203723909,
"signal/advantage_std": 0.21218505203723909,
"signal/brier_reward/centered_abs_mean": 0.2015215128660202,
"signal/brier_reward/group_std_mean": 0.25240307450294497,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025190189108252527,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.025190189108252527,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08064173310995101,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10429088771343231,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010080216638743877,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010080216638743877,
"signal/format_reward/centered_abs_mean": 0.00677490234375,
"signal/format_reward/group_std_mean": 0.01887845266610384,
"signal/format_reward/group_zero_std_frac": 0.896875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003387451171875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003387451171875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.1544154985807836,
"signal/frontier_aurc_reward/group_std_mean": 0.18873186707496642,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.002764037343149539,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.002764037343149539,
"signal/frontier_coverage_1/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_1/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_10/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_10/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_15/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_15/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_20/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_20/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_25/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_25/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_5/centered_abs_mean": 0.21573287844657899,
"signal/frontier_coverage_5/group_std_mean": 0.27374354004859924,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038616183679550885,
"signal/frontier_ece_reward/centered_abs_mean": 0.1824594885110855,
"signal/frontier_ece_reward/group_std_mean": 0.22269095629453659,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02280743606388569,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02280743606388569,
"step": 25
},
{
"calibration/aurc": 0.6451447340341319,
"calibration/batch_distribution_entropy": 0.8836254438118136,
"calibration/buffer_distribution_entropy": 0.7536336258740739,
"calibration/confidence_entropy": 0.5477458249391652,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.00234375,
"calibration/coverage@15%": 0.00234375,
"calibration/coverage@20%": 0.00234375,
"calibration/coverage@25%": 0.003125,
"calibration/coverage@30%": 0.003125,
"calibration/coverage@5%": 0.00234375,
"calibration/ece": 0.2766606577437428,
"calibration/mean_confidence": 0.5350011617572232,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 864.6,
"completions/max_terminated_length": 464.4,
"completions/mean_length": 126.6078125,
"completions/mean_terminated_length": 126.33264770507813,
"completions/min_length": 41.4,
"completions/min_terminated_length": 41.4,
"epoch": 0.096,
"grad_norm": 0.007818322628736496,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 101019631.0,
"reward": 0.9390641927719117,
"reward_std": 0.1480298787355423,
"rewards/accuracy_reward": 0.4728515625,
"rewards/brier_reward": 0.7311343312263489,
"rewards/confidence_uniqueness_reward": 0.8339264154434204,
"rewards/format_reward": 0.99755859375,
"rewards/frontier_aurc_reward": -0.004495029617100954,
"rewards/frontier_coverage_1": 0.06669748276472091,
"rewards/frontier_coverage_10": 0.06669748276472091,
"rewards/frontier_coverage_15": 0.06669748276472091,
"rewards/frontier_coverage_20": 0.06669748276472091,
"rewards/frontier_coverage_25": 0.06669748276472091,
"rewards/frontier_coverage_5": 0.06669748276472091,
"rewards/frontier_ece_reward": 0.009149301517754792,
"signal/accuracy_reward/centered_abs_mean": 0.17642822265625,
"signal/accuracy_reward/group_std_mean": 0.22929745614528657,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088214111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.088214111328125,
"signal/advantage_abs_mean": 0.11449322551488876,
"signal/advantage_pre_scale_abs_mean": 0.11449322551488876,
"signal/advantage_pre_scale_std": 0.16401045322418212,
"signal/advantage_std": 0.16401045322418212,
"signal/brier_reward/centered_abs_mean": 0.18242722153663635,
"signal/brier_reward/group_std_mean": 0.23005988895893098,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022803402692079543,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.022803402692079543,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08953404575586318,
"signal/confidence_uniqueness_reward/group_std_mean": 0.1142925649881363,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011191755719482898,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011191755719482898,
"signal/format_reward/centered_abs_mean": 0.004730224609375,
"signal/format_reward/group_std_mean": 0.013810678757727146,
"signal/format_reward/group_zero_std_frac": 0.921875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023651123046875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0023651123046875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024468526942655446,
"signal/frontier_aurc_reward/group_std_mean": 0.003993393434211612,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.379866222734563e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.379866222734563e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_1/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_10/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_10/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_15/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_15/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_20/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_20/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_25/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_25/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_5/centered_abs_mean": 0.1959122210741043,
"signal/frontier_coverage_5/group_std_mean": 0.26175145506858827,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003506828611716628,
"signal/frontier_ece_reward/centered_abs_mean": 0.06938310116529464,
"signal/frontier_ece_reward/group_std_mean": 0.08558708280324936,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00867288764566183,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00867288764566183,
"step": 30
},
{
"calibration/aurc": 0.3086294818860637,
"calibration/batch_distribution_entropy": 0.8961884358507668,
"calibration/buffer_distribution_entropy": 0.8133524081442107,
"calibration/confidence_entropy": 0.5430245461216926,
"calibration/coverage@0%": 0.009375,
"calibration/coverage@1%": 0.009375,
"calibration/coverage@10%": 0.06875,
"calibration/coverage@15%": 0.10546875,
"calibration/coverage@20%": 0.19140625,
"calibration/coverage@25%": 0.37265625,
"calibration/coverage@30%": 0.53671875,
"calibration/coverage@5%": 0.025,
"calibration/ece": 0.15086028370235913,
"calibration/mean_confidence": 0.46044110662133236,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1082.4,
"completions/max_terminated_length": 397.4,
"completions/mean_length": 135.834375,
"completions/mean_terminated_length": 135.4240753173828,
"completions/min_length": 49.2,
"completions/min_terminated_length": 49.2,
"epoch": 0.112,
"grad_norm": 0.004440602846443653,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 117520047.0,
"reward": 0.9525650858879089,
"reward_std": 0.12223374545574188,
"rewards/accuracy_reward": 0.48525390625,
"rewards/brier_reward": 0.7479133129119873,
"rewards/confidence_uniqueness_reward": 0.8512405276298523,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003925839858129621,
"rewards/frontier_coverage_1": 0.08381552398204803,
"rewards/frontier_coverage_10": 0.08381552398204803,
"rewards/frontier_coverage_15": 0.08381552398204803,
"rewards/frontier_coverage_20": 0.08381552398204803,
"rewards/frontier_coverage_25": 0.08381552398204803,
"rewards/frontier_coverage_5": 0.08381552398204803,
"rewards/frontier_ece_reward": 0.012414590083062648,
"signal/accuracy_reward/centered_abs_mean": 0.167669677734375,
"signal/accuracy_reward/group_std_mean": 0.21841561794281006,
"signal/accuracy_reward/group_zero_std_frac": 0.390625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0838348388671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0838348388671875,
"signal/advantage_abs_mean": 0.09548963457345963,
"signal/advantage_pre_scale_abs_mean": 0.09548963457345963,
"signal/advantage_pre_scale_std": 0.13613282144069672,
"signal/advantage_std": 0.13613282144069672,
"signal/brier_reward/centered_abs_mean": 0.1708603948354721,
"signal/brier_reward/group_std_mean": 0.21544656455516814,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021357549354434013,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021357549354434013,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08106829673051834,
"signal/confidence_uniqueness_reward/group_std_mean": 0.09855363517999649,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010133537091314792,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010133537091314792,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0016749128932133316,
"signal/frontier_aurc_reward/group_std_mean": 0.002730554435402155,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.998093877977226e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.998093877977226e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_1/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_10/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_10/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_15/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_15/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_20/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_20/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_25/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_25/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_5/centered_abs_mean": 0.24808040857315064,
"signal/frontier_coverage_5/group_std_mean": 0.3155758440494537,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044406389351934195,
"signal/frontier_ece_reward/centered_abs_mean": 0.04592524915933609,
"signal/frontier_ece_reward/group_std_mean": 0.06059465631842613,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005740656144917011,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005740656144917011,
"step": 35
},
{
"calibration/aurc": 0.4580176944367551,
"calibration/batch_distribution_entropy": 0.8535035360352362,
"calibration/buffer_distribution_entropy": 0.8659183215624904,
"calibration/confidence_entropy": 0.519083178935934,
"calibration/coverage@0%": 0.007815563725490197,
"calibration/coverage@1%": 0.007815563725490197,
"calibration/coverage@10%": 0.03203431372549019,
"calibration/coverage@15%": 0.06328431372549019,
"calibration/coverage@20%": 0.11484681372549019,
"calibration/coverage@25%": 0.1296905637254902,
"calibration/coverage@30%": 0.2289093137254902,
"calibration/coverage@5%": 0.007815563725490197,
"calibration/ece": 0.1627333490117647,
"calibration/mean_confidence": 0.3436108605470588,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 912.4,
"completions/max_terminated_length": 448.8,
"completions/mean_length": 149.380859375,
"completions/mean_terminated_length": 149.11015625,
"completions/min_length": 65.4,
"completions/min_terminated_length": 65.4,
"epoch": 0.128,
"grad_norm": 0.003327795770019293,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 133966379.0,
"reward": 0.9490593194961547,
"reward_std": 0.1036192610859871,
"rewards/accuracy_reward": 0.47001953125,
"rewards/brier_reward": 0.7549473881721497,
"rewards/confidence_uniqueness_reward": 0.8515770912170411,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0035853940062224865,
"rewards/frontier_coverage_1": 0.11151133924722671,
"rewards/frontier_coverage_10": 0.11151133924722671,
"rewards/frontier_coverage_15": 0.11151133924722671,
"rewards/frontier_coverage_20": 0.11151133924722671,
"rewards/frontier_coverage_25": 0.11151133924722671,
"rewards/frontier_coverage_5": 0.11151133924722671,
"rewards/frontier_ece_reward": 0.013309185951948166,
"signal/accuracy_reward/centered_abs_mean": 0.142132568359375,
"signal/accuracy_reward/group_std_mean": 0.19049813449382783,
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0710662841796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0710662841796875,
"signal/advantage_abs_mean": 0.08017075657844544,
"signal/advantage_pre_scale_abs_mean": 0.08017075657844544,
"signal/advantage_pre_scale_std": 0.11746061593294144,
"signal/advantage_std": 0.11746061593294144,
"signal/brier_reward/centered_abs_mean": 0.15917613804340364,
"signal/brier_reward/group_std_mean": 0.20062560141086577,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019897017255425455,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019897017255425455,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08653065264225006,
"signal/confidence_uniqueness_reward/group_std_mean": 0.11002808213233947,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010816331580281257,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010816331580281257,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_std_mean": 0.0038669900968670845,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012066281167790293,
"signal/frontier_aurc_reward/group_std_mean": 0.0019432639004662634,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1598641978926025e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1598641978926025e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_1/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_10/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_10/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_15/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_15/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_20/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_20/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_25/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_25/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_5/centered_abs_mean": 0.2550701230764389,
"signal/frontier_coverage_5/group_std_mean": 0.3213431596755981,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004565754998475313,
"signal/frontier_ece_reward/centered_abs_mean": 0.03292861394584179,
"signal/frontier_ece_reward/group_std_mean": 0.04621725678443909,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004116076743230224,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004116076743230224,
"step": 40
},
{
"calibration/aurc": 0.2264348749109058,
"calibration/batch_distribution_entropy": 0.947362197850653,
"calibration/buffer_distribution_entropy": 0.9023169704024638,
"calibration/confidence_entropy": 0.5164703875793273,
"calibration/coverage@0%": 0.0375,
"calibration/coverage@1%": 0.0375,
"calibration/coverage@10%": 0.14453125,
"calibration/coverage@15%": 0.29140625,
"calibration/coverage@20%": 0.46796875,
"calibration/coverage@25%": 0.7109375,
"calibration/coverage@30%": 0.76171875,
"calibration/coverage@5%": 0.06953125,
"calibration/ece": 0.293532578125,
"calibration/mean_confidence": 0.41213460937499996,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1085.0,
"completions/max_terminated_length": 383.8,
"completions/mean_length": 155.62353515625,
"completions/mean_terminated_length": 155.21868896484375,
"completions/min_length": 67.2,
"completions/min_terminated_length": 67.2,
"epoch": 0.144,
"grad_norm": 0.0031078618485480547,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 150510396.0,
"reward": 1.000108528137207,
"reward_std": 0.10603977590799332,
"rewards/accuracy_reward": 0.58642578125,
"rewards/brier_reward": 0.7386995673179626,
"rewards/confidence_uniqueness_reward": 0.8767276644706726,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0029010240454226733,
"rewards/frontier_coverage_1": 0.026493354281410576,
"rewards/frontier_coverage_10": 0.026493354281410576,
"rewards/frontier_coverage_15": 0.026493354281410576,
"rewards/frontier_coverage_20": 0.026493354281410576,
"rewards/frontier_coverage_25": 0.026493354281410576,
"rewards/frontier_coverage_5": 0.026493354281410576,
"rewards/frontier_ece_reward": 0.018952517956495284,
"signal/accuracy_reward/centered_abs_mean": 0.150006103515625,
"signal/accuracy_reward/group_std_mean": 0.19647997319698335,
"signal/accuracy_reward/group_zero_std_frac": 0.446875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750030517578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0750030517578125,
"signal/advantage_abs_mean": 0.08291901051998138,
"signal/advantage_pre_scale_abs_mean": 0.08291901051998138,
"signal/advantage_pre_scale_std": 0.11942969560623169,
"signal/advantage_std": 0.11942969560623169,
"signal/brier_reward/centered_abs_mean": 0.17461107671260834,
"signal/brier_reward/group_std_mean": 0.21828512847423553,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021826384589076042,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021826384589076042,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.06542369574308396,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08328969031572342,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008177961967885495,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008177961967885495,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014980694744735957,
"signal/frontier_aurc_reward/group_std_mean": 0.0023212187923491003,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6815443561645225e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6815443561645225e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_1/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_10/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_10/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_15/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_15/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_20/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_20/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_25/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_25/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_5/centered_abs_mean": 0.2612448215484619,
"signal/frontier_coverage_5/group_std_mean": 0.3272443234920502,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004676282219588756,
"signal/frontier_ece_reward/centered_abs_mean": 0.039357250183820726,
"signal/frontier_ece_reward/group_std_mean": 0.05260428786277771,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004919656272977591,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004919656272977591,
"step": 45
},
{
"calibration/aurc": 0.33544708568653764,
"calibration/batch_distribution_entropy": 0.9641692497219696,
"calibration/buffer_distribution_entropy": 0.9233838203150402,
"calibration/confidence_entropy": 0.4748932057775385,
"calibration/coverage@0%": 0.0046875,
"calibration/coverage@1%": 0.0046875,
"calibration/coverage@10%": 0.03671875,
"calibration/coverage@15%": 0.15174938725490197,
"calibration/coverage@20%": 0.2479810049019608,
"calibration/coverage@25%": 0.2894546568627451,
"calibration/coverage@30%": 0.34893688725490196,
"calibration/coverage@5%": 0.0046875,
"calibration/ece": 0.14829035447303923,
"calibration/mean_confidence": 0.46134567493872547,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 742.4,
"completions/max_terminated_length": 523.4,
"completions/mean_length": 163.4974609375,
"completions/mean_terminated_length": 163.36328125,
"completions/min_length": 75.8,
"completions/min_terminated_length": 75.8,
"epoch": 0.16,
"grad_norm": 0.0028037051670253277,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 167205538.0,
"reward": 0.9897387862205506,
"reward_std": 0.11076341718435287,
"rewards/accuracy_reward": 0.5359375,
"rewards/brier_reward": 0.7786368131637573,
"rewards/confidence_uniqueness_reward": 0.88655526638031,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003066997462883592,
"rewards/frontier_coverage_1": 0.09699134379625321,
"rewards/frontier_coverage_10": 0.09699134379625321,
"rewards/frontier_coverage_15": 0.09699134379625321,
"rewards/frontier_coverage_20": 0.09699134379625321,
"rewards/frontier_coverage_25": 0.09699134379625321,
"rewards/frontier_coverage_5": 0.09699134379625321,
"rewards/frontier_ece_reward": 0.026462964341044427,
"signal/accuracy_reward/centered_abs_mean": 0.14306640625,
"signal/accuracy_reward/group_std_mean": 0.1878939002752304,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071533203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.071533203125,
"signal/advantage_abs_mean": 0.08609444797039031,
"signal/advantage_pre_scale_abs_mean": 0.08609444797039031,
"signal/advantage_pre_scale_std": 0.12765211164951323,
"signal/advantage_std": 0.12765211164951323,
"signal/brier_reward/centered_abs_mean": 0.16862273216247559,
"signal/brier_reward/group_std_mean": 0.2126835286617279,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021077841520309448,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021077841520309448,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.056052202731370925,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06837449967861176,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070065253414213656,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070065253414213656,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002428090269677341,
"signal/frontier_aurc_reward/group_std_mean": 0.0038595238234847783,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.346281566540711e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.346281566540711e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_1/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_10/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_10/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_15/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_15/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_20/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_20/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_25/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_25/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_5/centered_abs_mean": 0.22630979716777802,
"signal/frontier_coverage_5/group_std_mean": 0.2893765389919281,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004050945350900293,
"signal/frontier_ece_reward/centered_abs_mean": 0.04531662836670876,
"signal/frontier_ece_reward/group_std_mean": 0.058810415863990786,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005664578545838595,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005664578545838595,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.4997777285356147,
"eval_calibration/batch_distribution_entropy": 0.9348213202225528,
"eval_calibration/buffer_distribution_entropy": 0.9305712700671265,
"eval_calibration/confidence_entropy": 0.47102110753245985,
"eval_calibration/coverage@0%": 0.046875,
"eval_calibration/coverage@1%": 0.046875,
"eval_calibration/coverage@10%": 0.046875,
"eval_calibration/coverage@15%": 0.046875,
"eval_calibration/coverage@20%": 0.046875,
"eval_calibration/coverage@25%": 0.0625,
"eval_calibration/coverage@30%": 0.234375,
"eval_calibration/coverage@5%": 0.046875,
"eval_calibration/ece": 0.281164314516129,
"eval_calibration/mean_confidence": 0.5592389112903225,
"eval_completions/clipped_ratio": 0.002049180327868827,
"eval_completions/max_length": 911.5,
"eval_completions/max_terminated_length": 311.5,
"eval_completions/mean_length": 170.20043182373047,
"eval_completions/mean_terminated_length": 167.39978790283203,
"eval_completions/min_length": 84.0,
"eval_completions/min_terminated_length": 84.0,
"eval_loss": 0.0,
"eval_num_tokens": 167205538.0,
"eval_reward": 0.90069180727005,
"eval_reward_std": 0.23488686978816986,
"eval_rewards/accuracy_reward": 0.361328125,
"eval_rewards/brier_reward": 0.7528277337551117,
"eval_rewards/confidence_uniqueness_reward": 0.8379772901535034,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.005042638164013624,
"eval_rewards/frontier_coverage_1": 0.18974924087524414,
"eval_rewards/frontier_coverage_10": 0.18974924087524414,
"eval_rewards/frontier_coverage_15": 0.18974924087524414,
"eval_rewards/frontier_coverage_20": 0.18974924087524414,
"eval_rewards/frontier_coverage_25": 0.18974924087524414,
"eval_rewards/frontier_coverage_5": 0.18974924087524414,
"eval_rewards/frontier_ece_reward": 0.01491912454366684,
"eval_runtime": 19.3233,
"eval_samples_per_second": 25.876,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4456787109375,
"eval_signal/accuracy_reward/group_std_mean": 0.47858355939388275,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22283935546875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22283935546875,
"eval_signal/advantage_abs_mean": 0.20646706968545914,
"eval_signal/advantage_pre_scale_abs_mean": 0.20646706968545914,
"eval_signal/advantage_pre_scale_std": 0.23231954872608185,
"eval_signal/advantage_std": 0.23231954872608185,
"eval_signal/brier_reward/centered_abs_mean": 0.22418075799942017,
"eval_signal/brier_reward/group_std_mean": 0.2734896242618561,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02802259474992752,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02802259474992752,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07463713735342026,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09528587758541107,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009329642169177532,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009329642169177532,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004753857152536511,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007068477105349302,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.50940377858933e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.50940377858933e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_1/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_10/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_15/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_20/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_25/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.31350038945674896,
"eval_signal/frontier_coverage_5/group_std_mean": 0.39658913016319275,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005611656466498971,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.05817369371652603,
"eval_signal/frontier_ece_reward/group_std_mean": 0.08309631422162056,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007271711714565754,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007271711714565754,
"eval_steps_per_second": 0.104,
"step": 50
},
{
"epoch": 0.16,
"step": 50,
"train_probe_calibration/aurc": 0.2664804729817548,
"train_probe_calibration/batch_distribution_entropy": 0.9324186885257131,
"train_probe_calibration/buffer_distribution_entropy": 0.9309467058219747,
"train_probe_calibration/confidence_entropy": 0.441220735421333,
"train_probe_calibration/coverage@0%": 0.03125,
"train_probe_calibration/coverage@1%": 0.03125,
"train_probe_calibration/coverage@10%": 0.03125,
"train_probe_calibration/coverage@15%": 0.46875,
"train_probe_calibration/coverage@20%": 0.484375,
"train_probe_calibration/coverage@25%": 0.578125,
"train_probe_calibration/coverage@30%": 0.640625,
"train_probe_calibration/coverage@5%": 0.03125,
"train_probe_calibration/ece": 0.17640625,
"train_probe_calibration/mean_confidence": 0.57484375,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 366.0,
"train_probe_completions/max_terminated_length": 366.0,
"train_probe_completions/mean_length": 165.32955932617188,
"train_probe_completions/mean_terminated_length": 165.32955932617188,
"train_probe_completions/min_length": 66.0,
"train_probe_completions/min_terminated_length": 66.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 167205538.0,
"train_probe_reward": 1.0041134357452393,
"train_probe_reward_std": 0.2359241172671318,
"train_probe_rewards/accuracy_reward": 0.57421875,
"train_probe_rewards/brier_reward": 0.7899730503559113,
"train_probe_rewards/confidence_uniqueness_reward": 0.839599609375,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.0027748874854296446,
"train_probe_rewards/frontier_coverage_1": 0.08121992275118828,
"train_probe_rewards/frontier_coverage_10": 0.08121992275118828,
"train_probe_rewards/frontier_coverage_15": 0.08121992275118828,
"train_probe_rewards/frontier_coverage_20": 0.08121992275118828,
"train_probe_rewards/frontier_coverage_25": 0.08121992275118828,
"train_probe_rewards/frontier_coverage_5": 0.08121992275118828,
"train_probe_rewards/frontier_ece_reward": 0.03707304783165455,
"train_probe_runtime": 9.186,
"train_probe_samples_per_second": 54.43,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.48193359375,
"train_probe_signal/accuracy_reward/group_std_mean": 0.49865010380744934,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.240966796875,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.240966796875,
"train_probe_signal/advantage_abs_mean": 0.2185721918940544,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2185721918940544,
"train_probe_signal/advantage_pre_scale_std": 0.23293063789606094,
"train_probe_signal/advantage_std": 0.23293063789606094,
"train_probe_signal/brier_reward/centered_abs_mean": 0.20220646262168884,
"train_probe_signal/brier_reward/group_std_mean": 0.24830978363752365,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025275807827711105,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.025275807827711105,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.076904296875,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09100573509931564,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009613037109375,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009613037109375,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.004000097163952887,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.00605845358222723,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.160173117881641e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.160173117881641e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30071887373924255,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4058589041233063,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0053828677628189325,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.06303473375737667,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.08334130793809891,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007879341719672084,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007879341719672084,
"train_probe_steps_per_second": 0.218
},
{
"calibration/aurc": 0.2950848289576411,
"calibration/batch_distribution_entropy": 0.930281332331097,
"calibration/buffer_distribution_entropy": 0.9341737996692416,
"calibration/confidence_entropy": 0.4755722991194964,
"calibration/coverage@0%": 0.01484375,
"calibration/coverage@1%": 0.01484375,
"calibration/coverage@10%": 0.16171875,
"calibration/coverage@15%": 0.2046875,
"calibration/coverage@20%": 0.2859375,
"calibration/coverage@25%": 0.3765625,
"calibration/coverage@30%": 0.5828125,
"calibration/coverage@5%": 0.10234375,
"calibration/ece": 0.1771126289368873,
"calibration/mean_confidence": 0.5750459494944853,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1084.8,
"completions/max_terminated_length": 497.4,
"completions/mean_length": 168.68369140625,
"completions/mean_terminated_length": 168.2832794189453,
"completions/min_length": 78.0,
"completions/min_terminated_length": 78.0,
"epoch": 0.176,
"grad_norm": 0.003821933874860406,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 184169979.0,
"reward": 0.9811482787132263,
"reward_std": 0.1093181312084198,
"rewards/accuracy_reward": 0.52294921875,
"rewards/brier_reward": 0.7728787422180176,
"rewards/confidence_uniqueness_reward": 0.8689801812171936,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.003453007619827986,
"rewards/frontier_coverage_1": 0.10514852032065392,
"rewards/frontier_coverage_10": 0.10514852032065392,
"rewards/frontier_coverage_15": 0.10514852032065392,
"rewards/frontier_coverage_20": 0.10514852032065392,
"rewards/frontier_coverage_25": 0.10514852032065392,
"rewards/frontier_coverage_5": 0.10514852032065392,
"rewards/frontier_ece_reward": 0.02763434946537018,
"signal/accuracy_reward/centered_abs_mean": 0.128729248046875,
"signal/accuracy_reward/group_std_mean": 0.1696704939007759,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643646240234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0643646240234375,
"signal/advantage_abs_mean": 0.0838969498872757,
"signal/advantage_pre_scale_abs_mean": 0.0838969498872757,
"signal/advantage_pre_scale_std": 0.12963834255933762,
"signal/advantage_std": 0.12963834255933762,
"signal/brier_reward/centered_abs_mean": 0.16208215355873107,
"signal/brier_reward/group_std_mean": 0.20663413107395173,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020260269194841383,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020260269194841383,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.07365219593048096,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08795170336961747,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00920652449131012,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00920652449131012,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002859164075925946,
"signal/frontier_aurc_reward/group_std_mean": 0.004245653934776783,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.117903638165444e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.117903638165444e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_1/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_10/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_10/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_15/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_15/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_20/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_20/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_25/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_25/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_5/centered_abs_mean": 0.1915457785129547,
"signal/frontier_coverage_5/group_std_mean": 0.24914441704750062,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003428669273853302,
"signal/frontier_ece_reward/centered_abs_mean": 0.04376091659069061,
"signal/frontier_ece_reward/group_std_mean": 0.05602394491434097,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005470114573836326,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005470114573836326,
"step": 55
},
{
"calibration/aurc": 0.331854588779599,
"calibration/batch_distribution_entropy": 0.8745741485620421,
"calibration/buffer_distribution_entropy": 0.9365178507635724,
"calibration/confidence_entropy": 0.4012815190096622,
"calibration/coverage@0%": 0.003125,
"calibration/coverage@1%": 0.003125,
"calibration/coverage@10%": 0.05234375,
"calibration/coverage@15%": 0.140625,
"calibration/coverage@20%": 0.32421875,
"calibration/coverage@25%": 0.3828125,
"calibration/coverage@30%": 0.5078125,
"calibration/coverage@5%": 0.003125,
"calibration/ece": 0.173107109375,
"calibration/mean_confidence": 0.6393742968749999,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 829.0,
"completions/max_terminated_length": 608.8,
"completions/mean_length": 167.47001953125,
"completions/mean_terminated_length": 167.33668518066406,
"completions/min_length": 78.8,
"completions/min_terminated_length": 78.8,
"epoch": 0.192,
"grad_norm": 0.004035938531160355,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 200699688.0,
"reward": 0.9879758596420288,
"reward_std": 0.11625557094812393,
"rewards/accuracy_reward": 0.53916015625,
"rewards/brier_reward": 0.7739283800125122,
"rewards/confidence_uniqueness_reward": 0.8580935597419739,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0036836853716522453,
"rewards/frontier_coverage_1": 0.10152835100889206,
"rewards/frontier_coverage_10": 0.10152835100889206,
"rewards/frontier_coverage_15": 0.10152835100889206,
"rewards/frontier_coverage_20": 0.10152835100889206,
"rewards/frontier_coverage_25": 0.10152835100889206,
"rewards/frontier_coverage_5": 0.10152835100889206,
"rewards/frontier_ece_reward": 0.030000920966267587,
"signal/accuracy_reward/centered_abs_mean": 0.136065673828125,
"signal/accuracy_reward/group_std_mean": 0.179530268907547,
"signal/accuracy_reward/group_zero_std_frac": 0.484375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0680328369140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0680328369140625,
"signal/advantage_abs_mean": 0.08931153416633605,
"signal/advantage_pre_scale_abs_mean": 0.08931153416633605,
"signal/advantage_pre_scale_std": 0.13905880898237227,
"signal/advantage_std": 0.13905880898237227,
"signal/brier_reward/centered_abs_mean": 0.17017331421375276,
"signal/brier_reward/group_std_mean": 0.21759623885154725,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021271664276719095,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021271664276719095,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09090490639209747,
"signal/confidence_uniqueness_reward/group_std_mean": 0.10824680477380752,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011363113299012184,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011363113299012184,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036389449145644904,
"signal/frontier_aurc_reward/group_std_mean": 0.0054845036007463936,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.513711123261601e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.513711123261601e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_1/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_10/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_10/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_15/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_15/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_20/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_20/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_25/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_25/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_5/centered_abs_mean": 0.17842654287815093,
"signal/frontier_coverage_5/group_std_mean": 0.23917962312698365,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003193834982812405,
"signal/frontier_ece_reward/centered_abs_mean": 0.04488262310624123,
"signal/frontier_ece_reward/group_std_mean": 0.056452129036188126,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0056103278882801534,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0056103278882801534,
"step": 60
},
{
"calibration/aurc": 0.32373530576954757,
"calibration/batch_distribution_entropy": 0.8591898290330275,
"calibration/buffer_distribution_entropy": 0.9347868571410448,
"calibration/confidence_entropy": 0.3849134860388972,
"calibration/coverage@0%": 0.00859375,
"calibration/coverage@1%": 0.00859375,
"calibration/coverage@10%": 0.09375,
"calibration/coverage@15%": 0.24765625,
"calibration/coverage@20%": 0.36015625,
"calibration/coverage@25%": 0.546875,
"calibration/coverage@30%": 0.65546875,
"calibration/coverage@5%": 0.00859375,
"calibration/ece": 0.18438471015114383,
"calibration/mean_confidence": 0.6311039438521242,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 873.6,
"completions/max_terminated_length": 432.2,
"completions/mean_length": 166.7802734375,
"completions/mean_terminated_length": 166.3782531738281,
"completions/min_length": 72.8,
"completions/min_terminated_length": 72.8,
"epoch": 0.208,
"grad_norm": 0.0025081464555114508,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 217439742.0,
"reward": 1.0152548551559448,
"reward_std": 0.11149686425924302,
"rewards/accuracy_reward": 0.58408203125,
"rewards/brier_reward": 0.7946485996246337,
"rewards/confidence_uniqueness_reward": 0.8775890946388245,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0030152024235576393,
"rewards/frontier_coverage_1": 0.09387013614177704,
"rewards/frontier_coverage_10": 0.09387013614177704,
"rewards/frontier_coverage_15": 0.09387013614177704,
"rewards/frontier_coverage_20": 0.09387013614177704,
"rewards/frontier_coverage_25": 0.09387013614177704,
"rewards/frontier_coverage_5": 0.09387013614177704,
"rewards/frontier_ece_reward": 0.03520463481545448,
"signal/accuracy_reward/centered_abs_mean": 0.121368408203125,
"signal/accuracy_reward/group_std_mean": 0.1650165855884552,
"signal/accuracy_reward/group_zero_std_frac": 0.515625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0606842041015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0606842041015625,
"signal/advantage_abs_mean": 0.08337271958589554,
"signal/advantage_pre_scale_abs_mean": 0.08337271958589554,
"signal/advantage_pre_scale_std": 0.13434576690196992,
"signal/advantage_std": 0.13434576690196992,
"signal/brier_reward/centered_abs_mean": 0.16763521134853362,
"signal/brier_reward/group_std_mean": 0.21714569628238678,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020954401418566703,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020954401418566703,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.08260471224784852,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0987936407327652,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010325589030981065,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010325589030981065,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003630819218233228,
"signal/frontier_aurc_reward/group_std_mean": 0.005773447826504708,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.499166338471695e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.499166338471695e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_1/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_10/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_10/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_15/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_15/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_20/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_20/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_25/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_25/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_5/centered_abs_mean": 0.17451754212379456,
"signal/frontier_coverage_5/group_std_mean": 0.23370930552482605,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031238638795912264,
"signal/frontier_ece_reward/centered_abs_mean": 0.04130900949239731,
"signal/frontier_ece_reward/group_std_mean": 0.05173059701919556,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005163626186549664,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005163626186549664,
"step": 65
},
{
"calibration/aurc": 0.3356664044008323,
"calibration/batch_distribution_entropy": 0.9384537311646113,
"calibration/buffer_distribution_entropy": 0.9376239395466122,
"calibration/confidence_entropy": 0.40272087778388677,
"calibration/coverage@0%": 0.03441176470588235,
"calibration/coverage@1%": 0.03441176470588235,
"calibration/coverage@10%": 0.1807077205882353,
"calibration/coverage@15%": 0.2613296568627451,
"calibration/coverage@20%": 0.310640318627451,
"calibration/coverage@25%": 0.3513939950980392,
"calibration/coverage@30%": 0.5038296568627451,
"calibration/coverage@5%": 0.0696966911764706,
"calibration/ece": 0.1596415861961429,
"calibration/mean_confidence": 0.5144321171091331,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00087890625,
"completions/max_length": 1094.8,
"completions/max_terminated_length": 759.4,
"completions/mean_length": 166.6509765625,
"completions/mean_terminated_length": 165.4495086669922,
"completions/min_length": 77.0,
"completions/min_terminated_length": 77.0,
"epoch": 0.224,
"grad_norm": 0.0021792047191411257,
"learning_rate": 1e-06,
"loss": 0.0025,
"num_tokens": 234299432.0,
"reward": 1.0031249046325683,
"reward_std": 0.10735798627138138,
"rewards/accuracy_reward": 0.55029296875,
"rewards/brier_reward": 0.7902113795280457,
"rewards/confidence_uniqueness_reward": 0.8990541458129883,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_aurc_reward": -0.003141326270997524,
"rewards/frontier_coverage_1": 0.12729544788599015,
"rewards/frontier_coverage_10": 0.12729544788599015,
"rewards/frontier_coverage_15": 0.12729544788599015,
"rewards/frontier_coverage_20": 0.12729544788599015,
"rewards/frontier_coverage_25": 0.12729544788599015,
"rewards/frontier_coverage_5": 0.12729544788599015,
"rewards/frontier_ece_reward": 0.029154983535408973,
"signal/accuracy_reward/centered_abs_mean": 0.120965576171875,
"signal/accuracy_reward/group_std_mean": 0.16113831400871276,
"signal/accuracy_reward/group_zero_std_frac": 0.53125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0604827880859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0604827880859375,
"signal/advantage_abs_mean": 0.08187931925058364,
"signal/advantage_pre_scale_abs_mean": 0.08187931925058364,
"signal/advantage_pre_scale_std": 0.13045653700828552,
"signal/advantage_std": 0.13045653700828552,
"signal/brier_reward/centered_abs_mean": 0.17681266367435455,
"signal/brier_reward/group_std_mean": 0.2258577436208725,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02210158295929432,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02210158295929432,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0674636647105217,
"signal/confidence_uniqueness_reward/group_std_mean": 0.08297923505306244,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008432958088815213,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008432958088815213,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844516694546,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0036583705339580776,
"signal/frontier_aurc_reward/group_std_mean": 0.0056047579273581505,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.548483070218935e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.548483070218935e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_1/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_10/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_10/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_15/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_15/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_20/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_20/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_25/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_25/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_5/centered_abs_mean": 0.19972024559974672,
"signal/frontier_coverage_5/group_std_mean": 0.26246256828308107,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00357499229721725,
"signal/frontier_ece_reward/centered_abs_mean": 0.03642488420009613,
"signal/frontier_ece_reward/group_std_mean": 0.04521550685167312,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0045531105250120165,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0045531105250120165,
"step": 70
},
{
"calibration/aurc": 0.3522414944051539,
"calibration/batch_distribution_entropy": 0.8803572798405815,
"calibration/buffer_distribution_entropy": 0.9410783570721908,
"calibration/confidence_entropy": 0.36782506176165003,
"calibration/coverage@0%": 0.01875,
"calibration/coverage@1%": 0.01875,
"calibration/coverage@10%": 0.09140625,
"calibration/coverage@15%": 0.128125,
"calibration/coverage@20%": 0.16328125,
"calibration/coverage@25%": 0.36796875,
"calibration/coverage@30%": 0.496875,
"calibration/coverage@5%": 0.0328125,
"calibration/ece": 0.1927504647077757,
"calibration/mean_confidence": 0.5310109334577756,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1190.8,
"completions/max_terminated_length": 514.6,
"completions/mean_length": 167.90107421875,
"completions/mean_terminated_length": 167.3661346435547,
"completions/min_length": 70.2,
"completions/min_terminated_length": 70.2,
"epoch": 0.24,
"grad_norm": 0.0024263551458716393,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 251270419.0,
"reward": 1.0291972041130066,
"reward_std": 0.10552183389663697,
"rewards/accuracy_reward": 0.60859375,
"rewards/brier_reward": 0.7870316863059997,
"rewards/confidence_uniqueness_reward": 0.91382737159729,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0025115890428423883,
"rewards/frontier_coverage_1": 0.08323012106120586,
"rewards/frontier_coverage_10": 0.08323012106120586,
"rewards/frontier_coverage_15": 0.08323012106120586,
"rewards/frontier_coverage_20": 0.08323012106120586,
"rewards/frontier_coverage_25": 0.08323012106120586,
"rewards/frontier_coverage_5": 0.08323012106120586,
"rewards/frontier_ece_reward": 0.02914494350552559,
"signal/accuracy_reward/centered_abs_mean": 0.1296142578125,
"signal/accuracy_reward/group_std_mean": 0.16963129937648774,
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06480712890625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06480712890625,
"signal/advantage_abs_mean": 0.08079658448696136,
"signal/advantage_pre_scale_abs_mean": 0.08079658448696136,
"signal/advantage_pre_scale_std": 0.12934576272964476,
"signal/advantage_std": 0.12934576272964476,
"signal/brier_reward/centered_abs_mean": 0.17554612457752228,
"signal/brier_reward/group_std_mean": 0.2262921988964081,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021943265572190285,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021943265572190285,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05461069941520691,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06644331142306328,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006826337426900864,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006826337426900864,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003250430291518569,
"signal/frontier_aurc_reward/group_std_mean": 0.005101799964904785,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.818270146846771e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.818270146846771e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_1/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_10/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_10/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_15/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_15/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_20/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_20/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_25/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_25/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_5/centered_abs_mean": 0.2036992698907852,
"signal/frontier_coverage_5/group_std_mean": 0.2712432205677032,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003646216681227088,
"signal/frontier_ece_reward/centered_abs_mean": 0.03291768655180931,
"signal/frontier_ece_reward/group_std_mean": 0.04102036878466606,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041147108189761635,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041147108189761635,
"step": 75
},
{
"calibration/aurc": 0.21512356164155774,
"calibration/batch_distribution_entropy": 0.8827675644974178,
"calibration/buffer_distribution_entropy": 0.9422732160404657,
"calibration/confidence_entropy": 0.36174526720048983,
"calibration/coverage@0%": 0.06015625,
"calibration/coverage@1%": 0.06015625,
"calibration/coverage@10%": 0.2484375,
"calibration/coverage@15%": 0.4230974264705882,
"calibration/coverage@20%": 0.5403370098039215,
"calibration/coverage@25%": 0.6317861519607844,
"calibration/coverage@30%": 0.7466666666666667,
"calibration/coverage@5%": 0.1515625,
"calibration/ece": 0.1830301858879185,
"calibration/mean_confidence": 0.5492079764272219,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 892.6,
"completions/max_terminated_length": 458.0,
"completions/mean_length": 164.4017578125,
"completions/mean_terminated_length": 163.9999572753906,
"completions/min_length": 75.8,
"completions/min_terminated_length": 75.8,
"epoch": 0.256,
"grad_norm": 0.0027514868415892124,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 268008709.0,
"reward": 1.0226893305778504,
"reward_std": 0.09834913462400437,
"rewards/accuracy_reward": 0.58173828125,
"rewards/brier_reward": 0.8035358548164367,
"rewards/confidence_uniqueness_reward": 0.9133859038352966,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0023928165435791017,
"rewards/frontier_coverage_1": 0.12931015118956565,
"rewards/frontier_coverage_10": 0.12931015118956565,
"rewards/frontier_coverage_15": 0.12931015118956565,
"rewards/frontier_coverage_20": 0.12931015118956565,
"rewards/frontier_coverage_25": 0.12931015118956565,
"rewards/frontier_coverage_5": 0.12931015118956565,
"rewards/frontier_ece_reward": 0.028441504389047623,
"signal/accuracy_reward/centered_abs_mean": 0.123785400390625,
"signal/accuracy_reward/group_std_mean": 0.1612432286143303,
"signal/accuracy_reward/group_zero_std_frac": 0.546875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0618927001953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0618927001953125,
"signal/advantage_abs_mean": 0.07478999271988869,
"signal/advantage_pre_scale_abs_mean": 0.07478999271988869,
"signal/advantage_pre_scale_std": 0.12347659170627594,
"signal/advantage_std": 0.12347659170627594,
"signal/brier_reward/centered_abs_mean": 0.16572422683238983,
"signal/brier_reward/group_std_mean": 0.2140843689441681,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020715528354048728,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020715528354048728,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.05513819307088852,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06670184880495071,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006892274133861065,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006892274133861065,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003011533757671714,
"signal/frontier_aurc_reward/group_std_mean": 0.0047927751205861565,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.39064516487997e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.39064516487997e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_1/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_10/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_10/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_15/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_15/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_20/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_20/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_25/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_25/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_5/centered_abs_mean": 0.19958887100219727,
"signal/frontier_coverage_5/group_std_mean": 0.2646804749965668,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035726406611502172,
"signal/frontier_ece_reward/centered_abs_mean": 0.02867573909461498,
"signal/frontier_ece_reward/group_std_mean": 0.03616860210895538,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035844673868268727,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035844673868268727,
"step": 80
},
{
"calibration/aurc": 0.21649586779760063,
"calibration/batch_distribution_entropy": 0.8773805631285727,
"calibration/buffer_distribution_entropy": 0.9423614066226536,
"calibration/confidence_entropy": 0.3884055467369814,
"calibration/coverage@0%": 0.0109375,
"calibration/coverage@1%": 0.0109375,
"calibration/coverage@10%": 0.18828125,
"calibration/coverage@15%": 0.38828125,
"calibration/coverage@20%": 0.47734375,
"calibration/coverage@25%": 0.74609375,
"calibration/coverage@30%": 0.8078125,
"calibration/coverage@5%": 0.0921875,
"calibration/ece": 0.12636070145032918,
"calibration/mean_confidence": 0.6323173064632307,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1088.2,
"completions/max_terminated_length": 434.4,
"completions/mean_length": 169.712890625,
"completions/mean_terminated_length": 169.04539794921874,
"completions/min_length": 74.4,
"completions/min_terminated_length": 74.4,
"epoch": 0.272,
"grad_norm": 0.0028915083967149258,
"learning_rate": 1e-06,
"loss": 0.0021,
"num_tokens": 284712265.0,
"reward": 1.0215874552726745,
"reward_std": 0.10492411553859711,
"rewards/accuracy_reward": 0.58447265625,
"rewards/brier_reward": 0.7885427713394165,
"rewards/confidence_uniqueness_reward": 0.9305692434310913,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0025600562803447247,
"rewards/frontier_coverage_1": 0.11024373397231102,
"rewards/frontier_coverage_10": 0.11024373397231102,
"rewards/frontier_coverage_15": 0.11024373397231102,
"rewards/frontier_coverage_20": 0.11024373397231102,
"rewards/frontier_coverage_25": 0.11024373397231102,
"rewards/frontier_coverage_5": 0.11024373397231102,
"rewards/frontier_ece_reward": 0.024467223882675172,
"signal/accuracy_reward/centered_abs_mean": 0.121722412109375,
"signal/accuracy_reward/group_std_mean": 0.16252617239952089,
"signal/accuracy_reward/group_zero_std_frac": 0.5375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875,
"signal/advantage_abs_mean": 0.07876608818769455,
"signal/advantage_pre_scale_abs_mean": 0.07876608818769455,
"signal/advantage_pre_scale_std": 0.12760126292705537,
"signal/advantage_std": 0.12760126292705537,
"signal/brier_reward/centered_abs_mean": 0.1705150604248047,
"signal/brier_reward/group_std_mean": 0.22130897045135497,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021314382553100586,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021314382553100586,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04144674874842167,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05355666503310204,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005180843593552709,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005180843593552709,
"signal/format_reward/centered_abs_mean": 0.001513671875,
"signal/format_reward/group_std_mean": 0.004419417260214687,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002936669299378991,
"signal/frontier_aurc_reward/group_std_mean": 0.0046648337505757805,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.256637814454734e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.256637814454734e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_1/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_10/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_10/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_15/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_15/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_20/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_20/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_25/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_25/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_5/centered_abs_mean": 0.19893713295459747,
"signal/frontier_coverage_5/group_std_mean": 0.2645448505878448,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035609744023531674,
"signal/frontier_ece_reward/centered_abs_mean": 0.027610136568546294,
"signal/frontier_ece_reward/group_std_mean": 0.034552381932735445,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0034512670710682867,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0034512670710682867,
"step": 85
},
{
"calibration/aurc": 0.24803686043807643,
"calibration/batch_distribution_entropy": 0.8444372092396852,
"calibration/buffer_distribution_entropy": 0.9429605942110868,
"calibration/confidence_entropy": 0.36471631140917615,
"calibration/coverage@0%": 0.0421875,
"calibration/coverage@1%": 0.0421875,
"calibration/coverage@10%": 0.2828125,
"calibration/coverage@15%": 0.32890625,
"calibration/coverage@20%": 0.4796875,
"calibration/coverage@25%": 0.546875,
"calibration/coverage@30%": 0.6203125,
"calibration/coverage@5%": 0.196875,
"calibration/ece": 0.12333755567361956,
"calibration/mean_confidence": 0.6039962674593244,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1108.2,
"completions/max_terminated_length": 514.0,
"completions/mean_length": 166.53251953125,
"completions/mean_terminated_length": 166.13165893554688,
"completions/min_length": 74.6,
"completions/min_terminated_length": 74.6,
"epoch": 0.288,
"grad_norm": 0.0030995451379567385,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 301375734.0,
"reward": 1.0154645323753357,
"reward_std": 0.10347330272197723,
"rewards/accuracy_reward": 0.5708984375,
"rewards/brier_reward": 0.7860757231712341,
"rewards/confidence_uniqueness_reward": 0.9404593467712402,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002836257731541991,
"rewards/frontier_coverage_1": 0.1086883544921875,
"rewards/frontier_coverage_10": 0.1086883544921875,
"rewards/frontier_coverage_15": 0.1086883544921875,
"rewards/frontier_coverage_20": 0.1086883544921875,
"rewards/frontier_coverage_25": 0.1086883544921875,
"rewards/frontier_coverage_5": 0.1086883544921875,
"rewards/frontier_ece_reward": 0.022170854546129704,
"signal/accuracy_reward/centered_abs_mean": 0.1334228515625,
"signal/accuracy_reward/group_std_mean": 0.17499251067638397,
"signal/accuracy_reward/group_zero_std_frac": 0.503125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06671142578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.06671142578125,
"signal/advantage_abs_mean": 0.07898036390542984,
"signal/advantage_pre_scale_abs_mean": 0.07898036390542984,
"signal/advantage_pre_scale_std": 0.12728380411863327,
"signal/advantage_std": 0.12728380411863327,
"signal/brier_reward/centered_abs_mean": 0.16704794466495515,
"signal/brier_reward/group_std_mean": 0.21451664268970488,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020880993083119394,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020880993083119394,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.0323214516043663,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04215872809290886,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004040181450545788,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004040181450545788,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028704125434160233,
"signal/frontier_aurc_reward/group_std_mean": 0.004553637374192477,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1380382501520216e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1380382501520216e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_1/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_10/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_10/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_15/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_15/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_20/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_20/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_25/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_25/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_5/centered_abs_mean": 0.19622489511966706,
"signal/frontier_coverage_5/group_std_mean": 0.2572601854801178,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003512425487861037,
"signal/frontier_ece_reward/centered_abs_mean": 0.026541993021965027,
"signal/frontier_ece_reward/group_std_mean": 0.032760906219482425,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033177491277456284,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033177491277456284,
"step": 90
},
{
"calibration/aurc": 0.2445192555610003,
"calibration/batch_distribution_entropy": 0.8722010859854628,
"calibration/buffer_distribution_entropy": 0.9413355201822619,
"calibration/confidence_entropy": 0.3738584132253522,
"calibration/coverage@0%": 0.015625,
"calibration/coverage@1%": 0.015625,
"calibration/coverage@10%": 0.2203125,
"calibration/coverage@15%": 0.3078125,
"calibration/coverage@20%": 0.49765625,
"calibration/coverage@25%": 0.62578125,
"calibration/coverage@30%": 0.70390625,
"calibration/coverage@5%": 0.05859375,
"calibration/ece": 0.12586600898720163,
"calibration/mean_confidence": 0.5935988981383198,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 929.8,
"completions/max_terminated_length": 490.4,
"completions/mean_length": 169.30595703125,
"completions/mean_terminated_length": 169.03911743164062,
"completions/min_length": 80.8,
"completions/min_terminated_length": 80.8,
"epoch": 0.304,
"grad_norm": 0.0025018032174557447,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 318039379.0,
"reward": 1.0334564208984376,
"reward_std": 0.09357217103242874,
"rewards/accuracy_reward": 0.60791015625,
"rewards/brier_reward": 0.7965248703956604,
"rewards/confidence_uniqueness_reward": 0.9396258115768432,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0024371820967644454,
"rewards/frontier_coverage_1": 0.09061380345374345,
"rewards/frontier_coverage_10": 0.09061380345374345,
"rewards/frontier_coverage_15": 0.09061380345374345,
"rewards/frontier_coverage_20": 0.09061380345374345,
"rewards/frontier_coverage_25": 0.09061380345374345,
"rewards/frontier_coverage_5": 0.09061380345374345,
"rewards/frontier_ece_reward": 0.023134828731417655,
"signal/accuracy_reward/centered_abs_mean": 0.111419677734375,
"signal/accuracy_reward/group_std_mean": 0.14964892268180846,
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0557098388671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0557098388671875,
"signal/advantage_abs_mean": 0.07044639587402343,
"signal/advantage_pre_scale_abs_mean": 0.07044639587402343,
"signal/advantage_pre_scale_std": 0.11818494796752929,
"signal/advantage_std": 0.11818494796752929,
"signal/brier_reward/centered_abs_mean": 0.15556592047214507,
"signal/brier_reward/group_std_mean": 0.20124119520187378,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019445740059018134,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019445740059018134,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.031480921804904936,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04011792093515396,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003935115225613117,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003935115225613117,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002605660632252693,
"signal/frontier_aurc_reward/group_std_mean": 0.004119851719588041,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.664132356992923e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.664132356992923e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_1/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_10/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_10/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_15/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_15/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_20/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_20/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_25/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_25/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_5/centered_abs_mean": 0.17928916215896606,
"signal/frontier_coverage_5/group_std_mean": 0.23793997764587402,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092759851366282,
"signal/frontier_ece_reward/centered_abs_mean": 0.02426176182925701,
"signal/frontier_ece_reward/group_std_mean": 0.030135614797472954,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030327202286571263,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030327202286571263,
"step": 95
},
{
"calibration/aurc": 0.1958580706055439,
"calibration/batch_distribution_entropy": 0.8434134033250091,
"calibration/buffer_distribution_entropy": 0.9401943668455596,
"calibration/confidence_entropy": 0.36734551027565937,
"calibration/coverage@0%": 0.032889093137254896,
"calibration/coverage@1%": 0.032889093137254896,
"calibration/coverage@10%": 0.2699050245098039,
"calibration/coverage@15%": 0.4144822303921568,
"calibration/coverage@20%": 0.6098314950980392,
"calibration/coverage@25%": 0.7403707107843138,
"calibration/coverage@30%": 0.8201439950980391,
"calibration/coverage@5%": 0.15497242647058823,
"calibration/ece": 0.1445590710969227,
"calibration/mean_confidence": 0.6401742367558774,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1306.8,
"completions/max_terminated_length": 501.4,
"completions/mean_length": 168.332421875,
"completions/mean_terminated_length": 167.664208984375,
"completions/min_length": 80.2,
"completions/min_terminated_length": 80.2,
"epoch": 0.32,
"grad_norm": 0.0026968803722411394,
"learning_rate": 1e-06,
"loss": 0.0017,
"num_tokens": 334851807.0,
"reward": 1.0260323882102966,
"reward_std": 0.08396224528551102,
"rewards/accuracy_reward": 0.585546875,
"rewards/brier_reward": 0.8071523427963256,
"rewards/confidence_uniqueness_reward": 0.9357501029968261,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.002748763840645552,
"rewards/frontier_coverage_1": 0.11854975577443838,
"rewards/frontier_coverage_10": 0.11854975577443838,
"rewards/frontier_coverage_15": 0.11854975577443838,
"rewards/frontier_coverage_20": 0.11854975577443838,
"rewards/frontier_coverage_25": 0.11854975577443838,
"rewards/frontier_coverage_5": 0.11854975577443838,
"rewards/frontier_ece_reward": 0.02365802228450775,
"signal/accuracy_reward/centered_abs_mean": 0.08590087890625,
"signal/accuracy_reward/group_std_mean": 0.1201841339468956,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042950439453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042950439453125,
"signal/advantage_abs_mean": 0.06151105165481567,
"signal/advantage_pre_scale_abs_mean": 0.06151105165481567,
"signal/advantage_pre_scale_std": 0.10997560620307922,
"signal/advantage_std": 0.10997560620307922,
"signal/brier_reward/centered_abs_mean": 0.14419465661048889,
"signal/brier_reward/group_std_mean": 0.18665907382965088,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01802433207631111,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01802433207631111,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03390970081090927,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04388536140322685,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004238712601363659,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004238712601363659,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002838291879743338,
"signal/frontier_aurc_reward/group_std_mean": 0.004497240483760834,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.080542250652797e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.080542250652797e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_1/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_10/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_10/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_15/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_15/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_20/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_20/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_25/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_25/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_5/centered_abs_mean": 0.1544642448425293,
"signal/frontier_coverage_5/group_std_mean": 0.20439959168434144,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027649099007248878,
"signal/frontier_ece_reward/centered_abs_mean": 0.02174353301525116,
"signal/frontier_ece_reward/group_std_mean": 0.02732553631067276,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002717941626906395,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002717941626906395,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5706461595447073,
"eval_calibration/batch_distribution_entropy": 0.8726977897672046,
"eval_calibration/buffer_distribution_entropy": 0.9393171128839451,
"eval_calibration/confidence_entropy": 0.4035573482539014,
"eval_calibration/coverage@0%": 0.015625,
"eval_calibration/coverage@1%": 0.015625,
"eval_calibration/coverage@10%": 0.015625,
"eval_calibration/coverage@15%": 0.015625,
"eval_calibration/coverage@20%": 0.015625,
"eval_calibration/coverage@25%": 0.015625,
"eval_calibration/coverage@30%": 0.015625,
"eval_calibration/coverage@5%": 0.015625,
"eval_calibration/ece": 0.24265838290022357,
"eval_calibration/mean_confidence": 0.556418300597109,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 330.0,
"eval_completions/max_terminated_length": 330.0,
"eval_completions/mean_length": 171.14369201660156,
"eval_completions/mean_terminated_length": 171.14369201660156,
"eval_completions/min_length": 96.0,
"eval_completions/min_terminated_length": 96.0,
"eval_loss": 0.0,
"eval_num_tokens": 334851807.0,
"eval_reward": 0.940795511007309,
"eval_reward_std": 0.23958701640367508,
"eval_rewards/accuracy_reward": 0.423828125,
"eval_rewards/brier_reward": 0.7562253475189209,
"eval_rewards/confidence_uniqueness_reward": 0.89501953125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.00461793364956975,
"eval_rewards/frontier_coverage_1": 0.19424864649772644,
"eval_rewards/frontier_coverage_10": 0.19424864649772644,
"eval_rewards/frontier_coverage_15": 0.19424864649772644,
"eval_rewards/frontier_coverage_20": 0.19424864649772644,
"eval_rewards/frontier_coverage_25": 0.19424864649772644,
"eval_rewards/frontier_coverage_5": 0.19424864649772644,
"eval_rewards/frontier_ece_reward": 0.013569748029112816,
"eval_runtime": 9.8344,
"eval_samples_per_second": 50.842,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125,
"eval_signal/accuracy_reward/group_std_mean": 0.4925154745578766,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625,
"eval_signal/advantage_abs_mean": 0.21671650558710098,
"eval_signal/advantage_pre_scale_abs_mean": 0.21671650558710098,
"eval_signal/advantage_pre_scale_std": 0.23654372990131378,
"eval_signal/advantage_std": 0.23654372990131378,
"eval_signal/brier_reward/centered_abs_mean": 0.2694649398326874,
"eval_signal/brier_reward/group_std_mean": 0.32334399223327637,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03368311747908592,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.03368311747908592,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0469818115234375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.056731242686510086,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058727264404296875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058727264404296875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005505842389538884,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009029718115925789,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.855458120000549e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.855458120000549e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_1/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_10/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_15/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_20/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_25/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3532957285642624,
"eval_signal/frontier_coverage_5/group_std_mean": 0.44448477029800415,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006323992973193526,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.033544719219207764,
"eval_signal/frontier_ece_reward/group_std_mean": 0.04166281037032604,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041930899024009705,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041930899024009705,
"eval_steps_per_second": 0.203,
"step": 100
},
{
"epoch": 0.32,
"step": 100,
"train_probe_calibration/aurc": 0.19455582700187118,
"train_probe_calibration/batch_distribution_entropy": 0.8136888433586318,
"train_probe_calibration/buffer_distribution_entropy": 0.939399714843496,
"train_probe_calibration/confidence_entropy": 0.3994395901362911,
"train_probe_calibration/coverage@0%": 0.125,
"train_probe_calibration/coverage@1%": 0.125,
"train_probe_calibration/coverage@10%": 0.296875,
"train_probe_calibration/coverage@15%": 0.5625,
"train_probe_calibration/coverage@20%": 0.609375,
"train_probe_calibration/coverage@25%": 0.84375,
"train_probe_calibration/coverage@30%": 0.953125,
"train_probe_calibration/coverage@5%": 0.125,
"train_probe_calibration/ece": 0.17050834345107496,
"train_probe_calibration/mean_confidence": 0.6579534095658197,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 360.5,
"train_probe_completions/max_terminated_length": 360.5,
"train_probe_completions/mean_length": 168.69681549072266,
"train_probe_completions/mean_terminated_length": 168.69681549072266,
"train_probe_completions/min_length": 83.5,
"train_probe_completions/min_terminated_length": 83.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 334851807.0,
"train_probe_reward": 1.0333038568496704,
"train_probe_reward_std": 0.23331268876791,
"train_probe_rewards/accuracy_reward": 0.61328125,
"train_probe_rewards/brier_reward": 0.8115493357181549,
"train_probe_rewards/confidence_uniqueness_reward": 0.89208984375,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.0020465875859372318,
"train_probe_rewards/frontier_coverage_1": 0.10023730993270874,
"train_probe_rewards/frontier_coverage_10": 0.10023730993270874,
"train_probe_rewards/frontier_coverage_15": 0.10023730993270874,
"train_probe_rewards/frontier_coverage_20": 0.10023730993270874,
"train_probe_rewards/frontier_coverage_25": 0.10023730993270874,
"train_probe_rewards/frontier_coverage_5": 0.10023730993270874,
"train_probe_rewards/frontier_ece_reward": 0.023835722357034683,
"train_probe_runtime": 9.3482,
"train_probe_samples_per_second": 53.486,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.462158203125,
"train_probe_signal/accuracy_reward/group_std_mean": 0.48812438547611237,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2310791015625,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2310791015625,
"train_probe_signal/advantage_abs_mean": 0.2127402350306511,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.2127402350306511,
"train_probe_signal/advantage_pre_scale_std": 0.23030224442481995,
"train_probe_signal/advantage_std": 0.23030224442481995,
"train_probe_signal/brier_reward/centered_abs_mean": 0.22354336827993393,
"train_probe_signal/brier_reward/group_std_mean": 0.28484727442264557,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02794292103499174,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02794292103499174,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.046661376953125,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05584513582289219,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005832672119140625,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005832672119140625,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0033606411889195442,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.005513262702152133,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.0155478422529995e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.0155478422529995e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30417926609516144,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.42205144464969635,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005444808630272746,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.03267330303788185,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.040300922468304634,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004084162879735231,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004084162879735231,
"train_probe_steps_per_second": 0.214
},
{
"calibration/aurc": 0.2704139956053221,
"calibration/batch_distribution_entropy": 0.9040743573140506,
"calibration/buffer_distribution_entropy": 0.9399021081106627,
"calibration/confidence_entropy": 0.40396196903365356,
"calibration/coverage@0%": 0.0171875,
"calibration/coverage@1%": 0.0171875,
"calibration/coverage@10%": 0.18515625,
"calibration/coverage@15%": 0.44453125,
"calibration/coverage@20%": 0.53671875,
"calibration/coverage@25%": 0.58203125,
"calibration/coverage@30%": 0.65078125,
"calibration/coverage@5%": 0.10859375,
"calibration/ece": 0.1906777345961955,
"calibration/mean_confidence": 0.5680050226904052,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 638.8,
"completions/max_terminated_length": 410.2,
"completions/mean_length": 168.918359375,
"completions/mean_terminated_length": 168.7849609375,
"completions/min_length": 75.2,
"completions/min_terminated_length": 75.2,
"epoch": 0.336,
"grad_norm": 0.0018242798978462815,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 351303963.0,
"reward": 1.0275990962982178,
"reward_std": 0.09201982617378235,
"rewards/accuracy_reward": 0.58984375,
"rewards/brier_reward": 0.8076816439628601,
"rewards/confidence_uniqueness_reward": 0.9373004913330079,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.002289101597853005,
"rewards/frontier_coverage_1": 0.11363897696137429,
"rewards/frontier_coverage_10": 0.11363897696137429,
"rewards/frontier_coverage_15": 0.11363897696137429,
"rewards/frontier_coverage_20": 0.11363897696137429,
"rewards/frontier_coverage_25": 0.11384689658880234,
"rewards/frontier_coverage_5": 0.11363897696137429,
"rewards/frontier_ece_reward": 0.020657552778720854,
"signal/accuracy_reward/centered_abs_mean": 0.10953369140625,
"signal/accuracy_reward/group_std_mean": 0.15012021660804747,
"signal/accuracy_reward/group_zero_std_frac": 0.55,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054766845703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054766845703125,
"signal/advantage_abs_mean": 0.06871124505996704,
"signal/advantage_pre_scale_abs_mean": 0.06871124505996704,
"signal/advantage_pre_scale_std": 0.11801368445158004,
"signal/advantage_std": 0.11801368445158004,
"signal/brier_reward/centered_abs_mean": 0.14712486565113067,
"signal/brier_reward/group_std_mean": 0.1890992045402527,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018390608206391334,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018390608206391334,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030815805494785308,
"signal/confidence_uniqueness_reward/group_std_mean": 0.039999409765005114,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038519756868481635,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038519756868481635,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086068242787,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002245573024265468,
"signal/frontier_aurc_reward/group_std_mean": 0.0035346172749996184,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.019575717393309e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.019575717393309e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1706451177597046,
"signal/frontier_coverage_1/group_std_mean": 0.22382004261016847,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_10/centered_abs_mean": 0.1706451177597046,
"signal/frontier_coverage_10/group_std_mean": 0.22382004261016847,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_15/centered_abs_mean": 0.1706451177597046,
"signal/frontier_coverage_15/group_std_mean": 0.22382004261016847,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_20/centered_abs_mean": 0.1706451177597046,
"signal/frontier_coverage_20/group_std_mean": 0.22382004261016847,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_25/centered_abs_mean": 0.16888906955718994,
"signal/frontier_coverage_25/group_std_mean": 0.22164588570594787,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030231142416596414,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030231142416596414,
"signal/frontier_coverage_5/centered_abs_mean": 0.1706451177597046,
"signal/frontier_coverage_5/group_std_mean": 0.22382004261016847,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030545474495738746,
"signal/frontier_ece_reward/centered_abs_mean": 0.02038377448916435,
"signal/frontier_ece_reward/group_std_mean": 0.025580647960305215,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002547971811145544,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002547971811145544,
"step": 105
},
{
"calibration/aurc": 0.24142044817482367,
"calibration/batch_distribution_entropy": 0.8656001475514318,
"calibration/buffer_distribution_entropy": 0.9409112708199789,
"calibration/confidence_entropy": 0.3802200750041355,
"calibration/coverage@0%": 0.04068321078431373,
"calibration/coverage@1%": 0.04068321078431373,
"calibration/coverage@10%": 0.2774356617647059,
"calibration/coverage@15%": 0.40088541666666666,
"calibration/coverage@20%": 0.49231617647058823,
"calibration/coverage@25%": 0.6063909313725491,
"calibration/coverage@30%": 0.6743841911764706,
"calibration/coverage@5%": 0.17272365196078432,
"calibration/ece": 0.08609408255888487,
"calibration/mean_confidence": 0.5179847446127612,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 821.2,
"completions/max_terminated_length": 582.6,
"completions/mean_length": 169.8603515625,
"completions/mean_terminated_length": 169.72701416015624,
"completions/min_length": 78.2,
"completions/min_terminated_length": 78.2,
"epoch": 0.352,
"grad_norm": 0.002221801085397601,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 368303749.0,
"reward": 0.9986960291862488,
"reward_std": 0.0819695919752121,
"rewards/accuracy_reward": 0.52705078125,
"rewards/brier_reward": 0.7985443472862244,
"rewards/confidence_uniqueness_reward": 0.9362337708473205,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0025407387875020504,
"rewards/frontier_coverage_1": 0.15299645960330963,
"rewards/frontier_coverage_10": 0.15299645960330963,
"rewards/frontier_coverage_15": 0.15299645960330963,
"rewards/frontier_coverage_20": 0.15299645960330963,
"rewards/frontier_coverage_25": 0.14608888924121857,
"rewards/frontier_coverage_5": 0.15299645960330963,
"rewards/frontier_ece_reward": 0.017266629636287688,
"signal/accuracy_reward/centered_abs_mean": 0.095184326171875,
"signal/accuracy_reward/group_std_mean": 0.1254624456167221,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0475921630859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0475921630859375,
"signal/advantage_abs_mean": 0.06207955777645111,
"signal/advantage_pre_scale_abs_mean": 0.06207955777645111,
"signal/advantage_pre_scale_std": 0.10653006732463836,
"signal/advantage_std": 0.10653006732463836,
"signal/brier_reward/centered_abs_mean": 0.1445058435201645,
"signal/brier_reward/group_std_mean": 0.1861796945333481,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01806323044002056,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01806323044002056,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030342183634638788,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0386995404958725,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037927729543298485,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037927729543298485,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001989635010249913,
"signal/frontier_aurc_reward/group_std_mean": 0.0031029653735458853,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.561446574167349e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.561446574167349e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17928497791290282,
"signal/frontier_coverage_1/group_std_mean": 0.23090406954288484,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_10/centered_abs_mean": 0.17928497791290282,
"signal/frontier_coverage_10/group_std_mean": 0.23090406954288484,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_15/centered_abs_mean": 0.17928497791290282,
"signal/frontier_coverage_15/group_std_mean": 0.23090406954288484,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_20/centered_abs_mean": 0.17928497791290282,
"signal/frontier_coverage_20/group_std_mean": 0.23090406954288484,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_25/centered_abs_mean": 0.17467791438102723,
"signal/frontier_coverage_25/group_std_mean": 0.22531512677669524,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031267345417290925,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031267345417290925,
"signal/frontier_coverage_5/centered_abs_mean": 0.17928497791290282,
"signal/frontier_coverage_5/group_std_mean": 0.23090406954288484,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092009671032427,
"signal/frontier_ece_reward/centered_abs_mean": 0.018327732756733894,
"signal/frontier_ece_reward/group_std_mean": 0.02301064059138298,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022909665945917367,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022909665945917367,
"step": 110
},
{
"calibration/aurc": 0.25086900973025095,
"calibration/batch_distribution_entropy": 0.9186404347150997,
"calibration/buffer_distribution_entropy": 0.9416779594893804,
"calibration/confidence_entropy": 0.4035477266164838,
"calibration/coverage@0%": 0.03438112745098039,
"calibration/coverage@1%": 0.03438112745098039,
"calibration/coverage@10%": 0.1820373774509804,
"calibration/coverage@15%": 0.21797487745098038,
"calibration/coverage@20%": 0.4117984068627451,
"calibration/coverage@25%": 0.5314705882352941,
"calibration/coverage@30%": 0.6292800245098039,
"calibration/coverage@5%": 0.16172487745098038,
"calibration/ece": 0.15323516031703535,
"calibration/mean_confidence": 0.5679697731010848,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 904.2,
"completions/max_terminated_length": 493.4,
"completions/mean_length": 169.8033203125,
"completions/mean_terminated_length": 169.5369659423828,
"completions/min_length": 76.8,
"completions/min_terminated_length": 76.8,
"epoch": 0.368,
"grad_norm": 0.0022946952376514673,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 385108007.0,
"reward": 1.0292543292045593,
"reward_std": 0.0754001870751381,
"rewards/accuracy_reward": 0.58623046875,
"rewards/brier_reward": 0.8222095847129822,
"rewards/confidence_uniqueness_reward": 0.9371579766273499,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0019215317443013191,
"rewards/frontier_coverage_1": 0.13132742196321487,
"rewards/frontier_coverage_10": 0.13132742196321487,
"rewards/frontier_coverage_15": 0.13132742196321487,
"rewards/frontier_coverage_20": 0.13132742196321487,
"rewards/frontier_coverage_25": 0.12279371917247772,
"rewards/frontier_coverage_5": 0.13132742196321487,
"rewards/frontier_ece_reward": 0.01957782618701458,
"signal/accuracy_reward/centered_abs_mean": 0.084307861328125,
"signal/accuracy_reward/group_std_mean": 0.11866024732589722,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625,
"signal/advantage_abs_mean": 0.05573421791195869,
"signal/advantage_pre_scale_abs_mean": 0.05573421791195869,
"signal/advantage_pre_scale_std": 0.09996391981840133,
"signal/advantage_std": 0.09996391981840133,
"signal/brier_reward/centered_abs_mean": 0.131490296125412,
"signal/brier_reward/group_std_mean": 0.17088458240032195,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0164362870156765,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0164362870156765,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02735428810119629,
"signal/confidence_uniqueness_reward/group_std_mean": 0.034791599959135056,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003419286012649536,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003419286012649536,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017890902236104012,
"signal/frontier_aurc_reward/group_std_mean": 0.00294273984618485,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2024714892031624e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2024714892031624e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16438928842544556,
"signal/frontier_coverage_1/group_std_mean": 0.21578606963157654,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_10/centered_abs_mean": 0.16438928842544556,
"signal/frontier_coverage_10/group_std_mean": 0.21578606963157654,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_15/centered_abs_mean": 0.16438928842544556,
"signal/frontier_coverage_15/group_std_mean": 0.21578606963157654,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_20/centered_abs_mean": 0.16438928842544556,
"signal/frontier_coverage_20/group_std_mean": 0.21578606963157654,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_25/centered_abs_mean": 0.15546331703662872,
"signal/frontier_coverage_25/group_std_mean": 0.2039874643087387,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027827932965010403,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027827932965010403,
"signal/frontier_coverage_5/centered_abs_mean": 0.16438928842544556,
"signal/frontier_coverage_5/group_std_mean": 0.21578606963157654,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002942568203434348,
"signal/frontier_ece_reward/centered_abs_mean": 0.01668607220053673,
"signal/frontier_ece_reward/group_std_mean": 0.02099420689046383,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002085759025067091,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002085759025067091,
"step": 115
},
{
"calibration/aurc": 0.2703069251334617,
"calibration/batch_distribution_entropy": 0.8990586414219536,
"calibration/buffer_distribution_entropy": 0.9422862701184694,
"calibration/confidence_entropy": 0.4024924860193864,
"calibration/coverage@0%": 0.04765625,
"calibration/coverage@1%": 0.04765625,
"calibration/coverage@10%": 0.38125,
"calibration/coverage@15%": 0.425,
"calibration/coverage@20%": 0.4703125,
"calibration/coverage@25%": 0.515625,
"calibration/coverage@30%": 0.55546875,
"calibration/coverage@5%": 0.23828125,
"calibration/ece": 0.1594206011865718,
"calibration/mean_confidence": 0.5080374241118275,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 729.8,
"completions/max_terminated_length": 581.2,
"completions/mean_length": 171.50654296875,
"completions/mean_terminated_length": 171.239794921875,
"completions/min_length": 76.6,
"completions/min_terminated_length": 76.6,
"epoch": 0.384,
"grad_norm": 0.0017057686345651746,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 401720746.0,
"reward": 1.030127477645874,
"reward_std": 0.08005195558071136,
"rewards/accuracy_reward": 0.5873046875,
"rewards/brier_reward": 0.8276395082473755,
"rewards/confidence_uniqueness_reward": 0.9362314462661743,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.001922252168878913,
"rewards/frontier_coverage_1": 0.13151057362556456,
"rewards/frontier_coverage_10": 0.13151057362556456,
"rewards/frontier_coverage_15": 0.13151057362556456,
"rewards/frontier_coverage_20": 0.13151057362556456,
"rewards/frontier_coverage_25": 0.11768633276224136,
"rewards/frontier_coverage_5": 0.13151057362556456,
"rewards/frontier_ece_reward": 0.019144237600266935,
"signal/accuracy_reward/centered_abs_mean": 0.09979248046875,
"signal/accuracy_reward/group_std_mean": 0.13565291166305543,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049896240234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049896240234375,
"signal/advantage_abs_mean": 0.058755910396575926,
"signal/advantage_pre_scale_abs_mean": 0.058755910396575926,
"signal/advantage_pre_scale_std": 0.10700914263725281,
"signal/advantage_std": 0.10700914263725281,
"signal/brier_reward/centered_abs_mean": 0.12325199693441391,
"signal/brier_reward/group_std_mean": 0.16149061620235444,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015406499616801739,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015406499616801739,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027864859998226167,
"signal/confidence_uniqueness_reward/group_std_mean": 0.036340619623661044,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003483107499778271,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003483107499778271,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017306852154433728,
"signal/frontier_aurc_reward/group_std_mean": 0.002775628166273236,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.097926237387583e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.097926237387583e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15827414095401765,
"signal/frontier_coverage_1/group_std_mean": 0.20665526986122132,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_10/centered_abs_mean": 0.15827414095401765,
"signal/frontier_coverage_10/group_std_mean": 0.20665526986122132,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_15/centered_abs_mean": 0.15827414095401765,
"signal/frontier_coverage_15/group_std_mean": 0.20665526986122132,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_20/centered_abs_mean": 0.15827414095401765,
"signal/frontier_coverage_20/group_std_mean": 0.20665526986122132,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_25/centered_abs_mean": 0.13856834620237352,
"signal/frontier_coverage_25/group_std_mean": 0.1815927118062973,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024803733453154565,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024803733453154565,
"signal/frontier_coverage_5/centered_abs_mean": 0.15827414095401765,
"signal/frontier_coverage_5/group_std_mean": 0.20665526986122132,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002833107067272067,
"signal/frontier_ece_reward/centered_abs_mean": 0.015285241603851318,
"signal/frontier_ece_reward/group_std_mean": 0.019187380746006965,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019106552004814147,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019106552004814147,
"step": 120
},
{
"calibration/aurc": 0.29008772776734093,
"calibration/batch_distribution_entropy": 0.9157935092331382,
"calibration/buffer_distribution_entropy": 0.9443153619361073,
"calibration/confidence_entropy": 0.4131123080990579,
"calibration/coverage@0%": 0.017981004901960786,
"calibration/coverage@1%": 0.017981004901960786,
"calibration/coverage@10%": 0.043762254901960784,
"calibration/coverage@15%": 0.14454350490196077,
"calibration/coverage@20%": 0.28598345588235297,
"calibration/coverage@25%": 0.46881740196078436,
"calibration/coverage@30%": 0.6245557598039216,
"calibration/coverage@5%": 0.017981004901960786,
"calibration/ece": 0.16912397182889852,
"calibration/mean_confidence": 0.5452109849824474,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 633.8,
"completions/max_terminated_length": 433.0,
"completions/mean_length": 174.31162109375,
"completions/mean_terminated_length": 174.17910766601562,
"completions/min_length": 85.2,
"completions/min_terminated_length": 85.2,
"epoch": 0.4,
"grad_norm": 0.0025587843265384436,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 418542145.0,
"reward": 1.0214404821395875,
"reward_std": 0.084488844871521,
"rewards/accuracy_reward": 0.583984375,
"rewards/brier_reward": 0.7968339323997498,
"rewards/confidence_uniqueness_reward": 0.936758029460907,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0024314658250659702,
"rewards/frontier_coverage_1": 0.10493959616869689,
"rewards/frontier_coverage_10": 0.10493959616869689,
"rewards/frontier_coverage_15": 0.10493959616869689,
"rewards/frontier_coverage_20": 0.10493959616869689,
"rewards/frontier_coverage_25": 0.0941769102588296,
"rewards/frontier_coverage_5": 0.10493959616869689,
"rewards/frontier_ece_reward": 0.01528221946209669,
"signal/accuracy_reward/centered_abs_mean": 0.103076171875,
"signal/accuracy_reward/group_std_mean": 0.14248399436473846,
"signal/accuracy_reward/group_zero_std_frac": 0.56875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515380859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0515380859375,
"signal/advantage_abs_mean": 0.06206804737448692,
"signal/advantage_pre_scale_abs_mean": 0.06206804737448692,
"signal/advantage_pre_scale_std": 0.11141373813152314,
"signal/advantage_std": 0.11141373813152314,
"signal/brier_reward/centered_abs_mean": 0.1348109632730484,
"signal/brier_reward/group_std_mean": 0.1749127984046936,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01685137040913105,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01685137040913105,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02733922004699707,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035421935841441154,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003417402505874634,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003417402505874634,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086068242787,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002029798785224557,
"signal/frontier_aurc_reward/group_std_mean": 0.0032225903123617172,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.633339802036062e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.633339802036062e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15750395655632018,
"signal/frontier_coverage_1/group_std_mean": 0.2061130702495575,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_10/centered_abs_mean": 0.15750395655632018,
"signal/frontier_coverage_10/group_std_mean": 0.2061130702495575,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_15/centered_abs_mean": 0.15750395655632018,
"signal/frontier_coverage_15/group_std_mean": 0.2061130702495575,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_20/centered_abs_mean": 0.15750395655632018,
"signal/frontier_coverage_20/group_std_mean": 0.2061130702495575,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_25/centered_abs_mean": 0.12791687697172166,
"signal/frontier_coverage_25/group_std_mean": 0.16793505549430848,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022897121030837297,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022897121030837297,
"signal/frontier_coverage_5/centered_abs_mean": 0.15750395655632018,
"signal/frontier_coverage_5/group_std_mean": 0.2061130702495575,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002819320699200034,
"signal/frontier_ece_reward/centered_abs_mean": 0.015673490427434444,
"signal/frontier_ece_reward/group_std_mean": 0.01953093260526657,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019591863034293055,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019591863034293055,
"step": 125
},
{
"calibration/aurc": 0.300496468657274,
"calibration/batch_distribution_entropy": 0.860343677952838,
"calibration/buffer_distribution_entropy": 0.9456375276771343,
"calibration/confidence_entropy": 0.4292367542687396,
"calibration/coverage@0%": 0.04453125,
"calibration/coverage@1%": 0.04453125,
"calibration/coverage@10%": 0.3421875,
"calibration/coverage@15%": 0.47421875,
"calibration/coverage@20%": 0.56484375,
"calibration/coverage@25%": 0.61171875,
"calibration/coverage@30%": 0.61875,
"calibration/coverage@5%": 0.13828125,
"calibration/ece": 0.1725572010194605,
"calibration/mean_confidence": 0.5447724534313727,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 661.0,
"completions/max_terminated_length": 436.8,
"completions/mean_length": 177.3251953125,
"completions/mean_terminated_length": 177.19320983886718,
"completions/min_length": 80.6,
"completions/min_terminated_length": 80.6,
"epoch": 0.416,
"grad_norm": 0.0018295373301953077,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 435239139.0,
"reward": 1.0142476677894592,
"reward_std": 0.07882001847028733,
"rewards/accuracy_reward": 0.5599609375,
"rewards/brier_reward": 0.8115284204483032,
"rewards/confidence_uniqueness_reward": 0.9416091442108154,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.001915666786953807,
"rewards/frontier_coverage_1": 0.12817499786615372,
"rewards/frontier_coverage_10": 0.12817499786615372,
"rewards/frontier_coverage_15": 0.12817499786615372,
"rewards/frontier_coverage_20": 0.12817499786615372,
"rewards/frontier_coverage_25": 0.11044178158044815,
"rewards/frontier_coverage_5": 0.12817499786615372,
"rewards/frontier_ece_reward": 0.014857827685773373,
"signal/accuracy_reward/centered_abs_mean": 0.0997314453125,
"signal/accuracy_reward/group_std_mean": 0.13128983080387116,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04986572265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04986572265625,
"signal/advantage_abs_mean": 0.05995083674788475,
"signal/advantage_pre_scale_abs_mean": 0.05995083674788475,
"signal/advantage_pre_scale_std": 0.10547690689563752,
"signal/advantage_std": 0.10547690689563752,
"signal/brier_reward/centered_abs_mean": 0.12911611646413804,
"signal/brier_reward/group_std_mean": 0.16568702459335327,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016139514558017255,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016139514558017255,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02463034950196743,
"signal/confidence_uniqueness_reward/group_std_mean": 0.031486156210303304,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030787936877459286,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030787936877459286,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014531841035932303,
"signal/frontier_aurc_reward/group_std_mean": 0.002290627988986671,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6011993395513854e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6011993395513854e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17003713846206664,
"signal/frontier_coverage_1/group_std_mean": 0.21856652796268464,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_10/centered_abs_mean": 0.17003713846206664,
"signal/frontier_coverage_10/group_std_mean": 0.21856652796268464,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_15/centered_abs_mean": 0.17003713846206664,
"signal/frontier_coverage_15/group_std_mean": 0.21856652796268464,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_20/centered_abs_mean": 0.17003713846206664,
"signal/frontier_coverage_20/group_std_mean": 0.21856652796268464,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_25/centered_abs_mean": 0.1356060341000557,
"signal/frontier_coverage_25/group_std_mean": 0.17483413219451904,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002427347889170051,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002427347889170051,
"signal/frontier_coverage_5/centered_abs_mean": 0.17003713846206664,
"signal/frontier_coverage_5/group_std_mean": 0.21856652796268464,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030436647590249776,
"signal/frontier_ece_reward/centered_abs_mean": 0.013920800760388374,
"signal/frontier_ece_reward/group_std_mean": 0.017522389814257622,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017401000950485468,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017401000950485468,
"step": 130
},
{
"calibration/aurc": 0.1721532756826449,
"calibration/batch_distribution_entropy": 0.8766754582035976,
"calibration/buffer_distribution_entropy": 0.9459665615870213,
"calibration/confidence_entropy": 0.3974590959603793,
"calibration/coverage@0%": 0.10237132352941176,
"calibration/coverage@1%": 0.16409007352941177,
"calibration/coverage@10%": 0.5047549019607842,
"calibration/coverage@15%": 0.5892555147058823,
"calibration/coverage@20%": 0.6220955882352941,
"calibration/coverage@25%": 0.6886182598039217,
"calibration/coverage@30%": 0.7379197303921569,
"calibration/coverage@5%": 0.44065257352941173,
"calibration/ece": 0.15323011827988556,
"calibration/mean_confidence": 0.6029602178207593,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 753.0,
"completions/max_terminated_length": 553.2,
"completions/mean_length": 176.35029296875,
"completions/mean_terminated_length": 176.21759643554688,
"completions/min_length": 84.8,
"completions/min_terminated_length": 84.8,
"epoch": 0.432,
"grad_norm": 0.0018569445237517357,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 452059302.0,
"reward": 1.0361051321029664,
"reward_std": 0.07464597374200821,
"rewards/accuracy_reward": 0.598828125,
"rewards/brier_reward": 0.8299178600311279,
"rewards/confidence_uniqueness_reward": 0.9411059260368347,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.001714168442413211,
"rewards/frontier_coverage_1": 0.12789682820439338,
"rewards/frontier_coverage_10": 0.12789682820439338,
"rewards/frontier_coverage_15": 0.12789682820439338,
"rewards/frontier_coverage_20": 0.12789682820439338,
"rewards/frontier_coverage_25": 0.1091009445488453,
"rewards/frontier_coverage_5": 0.12789682820439338,
"rewards/frontier_ece_reward": 0.016333967633545398,
"signal/accuracy_reward/centered_abs_mean": 0.09854736328125,
"signal/accuracy_reward/group_std_mean": 0.12864942103624344,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049273681640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049273681640625,
"signal/advantage_abs_mean": 0.05766047313809395,
"signal/advantage_pre_scale_abs_mean": 0.05766047313809395,
"signal/advantage_pre_scale_std": 0.10433387905359268,
"signal/advantage_std": 0.10433387905359268,
"signal/brier_reward/centered_abs_mean": 0.11633200347423553,
"signal/brier_reward/group_std_mean": 0.14996844828128814,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014541500434279441,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014541500434279441,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025572020933032034,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03261452466249466,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031965026166290043,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031965026166290043,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014799919212237001,
"signal/frontier_aurc_reward/group_std_mean": 0.0023754774127155544,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6491854441701435e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6491854441701435e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15234991312026977,
"signal/frontier_coverage_1/group_std_mean": 0.19905296862125396,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_10/centered_abs_mean": 0.15234991312026977,
"signal/frontier_coverage_10/group_std_mean": 0.19905296862125396,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_15/centered_abs_mean": 0.15234991312026977,
"signal/frontier_coverage_15/group_std_mean": 0.19905296862125396,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_20/centered_abs_mean": 0.15234991312026977,
"signal/frontier_coverage_20/group_std_mean": 0.19905296862125396,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_25/centered_abs_mean": 0.1211901381611824,
"signal/frontier_coverage_25/group_std_mean": 0.15898216962814332,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021693034097552298,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021693034097552298,
"signal/frontier_coverage_5/centered_abs_mean": 0.15234991312026977,
"signal/frontier_coverage_5/group_std_mean": 0.19905296862125396,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002727063372731209,
"signal/frontier_ece_reward/centered_abs_mean": 0.012844923511147499,
"signal/frontier_ece_reward/group_std_mean": 0.01613148283213377,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016056154388934373,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016056154388934373,
"step": 135
},
{
"calibration/aurc": 0.25730294087394334,
"calibration/batch_distribution_entropy": 0.8953334895238516,
"calibration/buffer_distribution_entropy": 0.9452347981622419,
"calibration/confidence_entropy": 0.40368902257402794,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.2546875,
"calibration/coverage@15%": 0.32578125,
"calibration/coverage@20%": 0.4125,
"calibration/coverage@25%": 0.4640625,
"calibration/coverage@30%": 0.67890625,
"calibration/coverage@5%": 0.02109375,
"calibration/ece": 0.1689719891195878,
"calibration/mean_confidence": 0.6208841497585194,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 462.2,
"completions/max_terminated_length": 462.2,
"completions/mean_length": 182.95234375,
"completions/mean_terminated_length": 182.95234375,
"completions/min_length": 85.8,
"completions/min_terminated_length": 85.8,
"epoch": 0.448,
"grad_norm": 0.0040541719645261765,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 468885534.0,
"reward": 1.0192960262298585,
"reward_std": 0.07691188901662827,
"rewards/accuracy_reward": 0.56416015625,
"rewards/brier_reward": 0.8227449178695678,
"rewards/confidence_uniqueness_reward": 0.9410862445831298,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0022552535170689224,
"rewards/frontier_coverage_1": 0.14381106197834015,
"rewards/frontier_coverage_10": 0.14381106197834015,
"rewards/frontier_coverage_15": 0.14381106197834015,
"rewards/frontier_coverage_20": 0.14381106197834015,
"rewards/frontier_coverage_25": 0.12079337984323502,
"rewards/frontier_coverage_5": 0.14381106197834015,
"rewards/frontier_ece_reward": 0.014734631776809693,
"signal/accuracy_reward/centered_abs_mean": 0.091912841796875,
"signal/accuracy_reward/group_std_mean": 0.12225985080003739,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459564208984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459564208984375,
"signal/advantage_abs_mean": 0.057493841648101805,
"signal/advantage_pre_scale_abs_mean": 0.057493841648101805,
"signal/advantage_pre_scale_std": 0.10439873188734054,
"signal/advantage_std": 0.10439873188734054,
"signal/brier_reward/centered_abs_mean": 0.1224316492676735,
"signal/brier_reward/group_std_mean": 0.1602381944656372,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015303956158459187,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015303956158459187,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026432880386710166,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03415291607379913,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033041100483387708,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033041100483387708,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001860675076022744,
"signal/frontier_aurc_reward/group_std_mean": 0.003051386307924986,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.330608233227394e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.330608233227394e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15158057063817978,
"signal/frontier_coverage_1/group_std_mean": 0.19816445112228392,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_10/centered_abs_mean": 0.15158057063817978,
"signal/frontier_coverage_10/group_std_mean": 0.19816445112228392,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_15/centered_abs_mean": 0.15158057063817978,
"signal/frontier_coverage_15/group_std_mean": 0.19816445112228392,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_20/centered_abs_mean": 0.15158057063817978,
"signal/frontier_coverage_20/group_std_mean": 0.19816445112228392,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_25/centered_abs_mean": 0.11687376201152802,
"signal/frontier_coverage_25/group_std_mean": 0.15403735041618347,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020920401671901344,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020920401671901344,
"signal/frontier_coverage_5/centered_abs_mean": 0.15158057063817978,
"signal/frontier_coverage_5/group_std_mean": 0.19816445112228392,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713292092084885,
"signal/frontier_ece_reward/centered_abs_mean": 0.013030365109443665,
"signal/frontier_ece_reward/group_std_mean": 0.01642268504947424,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001628795638680458,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001628795638680458,
"step": 140
},
{
"calibration/aurc": 0.31054807919449445,
"calibration/batch_distribution_entropy": 0.8982943819177818,
"calibration/buffer_distribution_entropy": 0.9453990493361039,
"calibration/confidence_entropy": 0.4392308925461128,
"calibration/coverage@0%": 0.010159313725490195,
"calibration/coverage@1%": 0.010159313725490195,
"calibration/coverage@10%": 0.11797181372549019,
"calibration/coverage@15%": 0.1523468137254902,
"calibration/coverage@20%": 0.2476593137254902,
"calibration/coverage@25%": 0.2921905637254902,
"calibration/coverage@30%": 0.5638878676470588,
"calibration/coverage@5%": 0.010159313725490195,
"calibration/ece": 0.16542806795759996,
"calibration/mean_confidence": 0.6450125473590109,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1155.6,
"completions/max_terminated_length": 662.2,
"completions/mean_length": 187.3685546875,
"completions/mean_terminated_length": 186.973779296875,
"completions/min_length": 88.8,
"completions/min_terminated_length": 88.8,
"epoch": 0.464,
"grad_norm": 0.0020874959882348776,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 485975004.0,
"reward": 0.9936848402023315,
"reward_std": 0.07820483893156052,
"rewards/accuracy_reward": 0.5205078125,
"rewards/brier_reward": 0.7968811154365539,
"rewards/confidence_uniqueness_reward": 0.934678053855896,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0026155672036111354,
"rewards/frontier_coverage_1": 0.15401808321475982,
"rewards/frontier_coverage_10": 0.15401808321475982,
"rewards/frontier_coverage_15": 0.15401808321475982,
"rewards/frontier_coverage_20": 0.15401808321475982,
"rewards/frontier_coverage_25": 0.12326906770467758,
"rewards/frontier_coverage_5": 0.15401808321475982,
"rewards/frontier_ece_reward": 0.011458772234618664,
"signal/accuracy_reward/centered_abs_mean": 0.084423828125,
"signal/accuracy_reward/group_std_mean": 0.11666271984577178,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422119140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422119140625,
"signal/advantage_abs_mean": 0.05724867507815361,
"signal/advantage_pre_scale_abs_mean": 0.05724867507815361,
"signal/advantage_pre_scale_std": 0.10630969554185868,
"signal/advantage_std": 0.10630969554185868,
"signal/brier_reward/centered_abs_mean": 0.12861161679029465,
"signal/brier_reward/group_std_mean": 0.16630764305591583,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01607645209878683,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01607645209878683,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028835254535079003,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03829977139830589,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036044068168848754,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036044068168848754,
"signal/format_reward/centered_abs_mean": 0.001513671875,
"signal/format_reward/group_std_mean": 0.004419417260214687,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0019000403117388487,
"signal/frontier_aurc_reward/group_std_mean": 0.0030668860767036677,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.40107213560259e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.40107213560259e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14896406829357148,
"signal/frontier_coverage_1/group_std_mean": 0.19503563046455383,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_10/centered_abs_mean": 0.14896406829357148,
"signal/frontier_coverage_10/group_std_mean": 0.19503563046455383,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_15/centered_abs_mean": 0.14896406829357148,
"signal/frontier_coverage_15/group_std_mean": 0.19503563046455383,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_20/centered_abs_mean": 0.14896406829357148,
"signal/frontier_coverage_20/group_std_mean": 0.19503563046455383,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_25/centered_abs_mean": 0.11530720740556717,
"signal/frontier_coverage_25/group_std_mean": 0.15157280564308168,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020639989525079727,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020639989525079727,
"signal/frontier_coverage_5/centered_abs_mean": 0.14896406829357148,
"signal/frontier_coverage_5/group_std_mean": 0.19503563046455383,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002666456811130047,
"signal/frontier_ece_reward/centered_abs_mean": 0.012446103803813457,
"signal/frontier_ece_reward/group_std_mean": 0.015705187618732453,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001555762975476682,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001555762975476682,
"step": 145
},
{
"calibration/aurc": 0.24557281662898522,
"calibration/batch_distribution_entropy": 0.8341399106383033,
"calibration/buffer_distribution_entropy": 0.9449403760979717,
"calibration/confidence_entropy": 0.3687400847573974,
"calibration/coverage@0%": 0.065625,
"calibration/coverage@1%": 0.065625,
"calibration/coverage@10%": 0.22734375,
"calibration/coverage@15%": 0.3375,
"calibration/coverage@20%": 0.446875,
"calibration/coverage@25%": 0.56953125,
"calibration/coverage@30%": 0.74375,
"calibration/coverage@5%": 0.14296875,
"calibration/ece": 0.11169667756470028,
"calibration/mean_confidence": 0.5292462022999398,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 911.8,
"completions/max_terminated_length": 479.0,
"completions/mean_length": 183.63916015625,
"completions/mean_terminated_length": 183.241748046875,
"completions/min_length": 88.6,
"completions/min_terminated_length": 88.6,
"epoch": 0.48,
"grad_norm": 0.0024119976442307234,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 502903501.0,
"reward": 1.0209609508514403,
"reward_std": 0.08060411512851715,
"rewards/accuracy_reward": 0.57626953125,
"rewards/brier_reward": 0.8086855053901673,
"rewards/confidence_uniqueness_reward": 0.9317057371139527,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0018240779172629118,
"rewards/frontier_coverage_1": 0.1329729899764061,
"rewards/frontier_coverage_10": 0.1329729899764061,
"rewards/frontier_coverage_15": 0.1329729899764061,
"rewards/frontier_coverage_20": 0.1329729899764061,
"rewards/frontier_coverage_25": 0.10789064913988114,
"rewards/frontier_coverage_5": 0.1329729899764061,
"rewards/frontier_ece_reward": 0.012992727011442185,
"signal/accuracy_reward/centered_abs_mean": 0.114581298828125,
"signal/accuracy_reward/group_std_mean": 0.1539652705192566,
"signal/accuracy_reward/group_zero_std_frac": 0.55,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0572906494140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0572906494140625,
"signal/advantage_abs_mean": 0.060080311447381976,
"signal/advantage_pre_scale_abs_mean": 0.060080311447381976,
"signal/advantage_pre_scale_std": 0.10676742047071457,
"signal/advantage_std": 0.10676742047071457,
"signal/brier_reward/centered_abs_mean": 0.13224513232707977,
"signal/brier_reward/group_std_mean": 0.16912654638290406,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01653064154088497,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01653064154088497,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030292441695928575,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0388321079313755,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003786555211991072,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003786555211991072,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013736919732764362,
"signal/frontier_aurc_reward/group_std_mean": 0.0021870420314371586,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.458908493281342e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.458908493281342e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18107914328575134,
"signal/frontier_coverage_1/group_std_mean": 0.23383658230304719,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_10/centered_abs_mean": 0.18107914328575134,
"signal/frontier_coverage_10/group_std_mean": 0.23383658230304719,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_15/centered_abs_mean": 0.18107914328575134,
"signal/frontier_coverage_15/group_std_mean": 0.23383658230304719,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_20/centered_abs_mean": 0.18107914328575134,
"signal/frontier_coverage_20/group_std_mean": 0.23383658230304719,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_25/centered_abs_mean": 0.13092263638973237,
"signal/frontier_coverage_25/group_std_mean": 0.170234015583992,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002343515120446682,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002343515120446682,
"signal/frontier_coverage_5/centered_abs_mean": 0.18107914328575134,
"signal/frontier_coverage_5/group_std_mean": 0.23383658230304719,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003241316508501768,
"signal/frontier_ece_reward/centered_abs_mean": 0.012281083315610886,
"signal/frontier_ece_reward/group_std_mean": 0.015359072759747506,
"signal/frontier_ece_reward/group_zero_std_frac": 0.00625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015351354144513608,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015351354144513608,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.406549383364957,
"eval_calibration/batch_distribution_entropy": 0.8909663454672565,
"eval_calibration/buffer_distribution_entropy": 0.9446340496301424,
"eval_calibration/confidence_entropy": 0.46453067903100087,
"eval_calibration/coverage@0%": 0.078125,
"eval_calibration/coverage@1%": 0.078125,
"eval_calibration/coverage@10%": 0.078125,
"eval_calibration/coverage@15%": 0.109375,
"eval_calibration/coverage@20%": 0.109375,
"eval_calibration/coverage@25%": 0.140625,
"eval_calibration/coverage@30%": 0.21875,
"eval_calibration/coverage@5%": 0.078125,
"eval_calibration/ece": 0.1869676292547091,
"eval_calibration/mean_confidence": 0.5198574552595667,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 451.5,
"eval_completions/max_terminated_length": 451.5,
"eval_completions/mean_length": 184.18531799316406,
"eval_completions/mean_terminated_length": 184.18531799316406,
"eval_completions/min_length": 94.5,
"eval_completions/min_terminated_length": 94.5,
"eval_loss": 0.0,
"eval_num_tokens": 502903501.0,
"eval_reward": 0.9559306502342224,
"eval_reward_std": 0.22658731788396835,
"eval_rewards/accuracy_reward": 0.443359375,
"eval_rewards/brier_reward": 0.7953170835971832,
"eval_rewards/confidence_uniqueness_reward": 0.896484375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0025530497077852488,
"eval_rewards/frontier_coverage_1": 0.20948684215545654,
"eval_rewards/frontier_coverage_10": 0.20948684215545654,
"eval_rewards/frontier_coverage_15": 0.20948684215545654,
"eval_rewards/frontier_coverage_20": 0.20948684215545654,
"eval_rewards/frontier_coverage_25": 0.1534598395228386,
"eval_rewards/frontier_coverage_5": 0.20948684215545654,
"eval_rewards/frontier_ece_reward": 0.010603584349155426,
"eval_runtime": 11.305,
"eval_samples_per_second": 44.228,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4813232421875,
"eval_signal/accuracy_reward/group_std_mean": 0.49823255836963654,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24066162109375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.24066162109375,
"eval_signal/advantage_abs_mean": 0.20942430198192596,
"eval_signal/advantage_pre_scale_abs_mean": 0.20942430198192596,
"eval_signal/advantage_pre_scale_std": 0.223799467086792,
"eval_signal/advantage_std": 0.223799467086792,
"eval_signal/brier_reward/centered_abs_mean": 0.21622556447982788,
"eval_signal/brier_reward/group_std_mean": 0.2670576274394989,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027028195559978485,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027028195559978485,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0438079833984375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05144248157739639,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054759979248046875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054759979248046875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002718214178457856,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004650075454264879,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.865603295911569e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.865603295911569e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3949373662471771,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4775615483522415,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3949373662471771,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4775615483522415,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.3949373662471771,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4775615483522415,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.3949373662471771,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4775615483522415,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.2780953347682953,
"eval_signal/frontier_coverage_25/group_std_mean": 0.34059378504753113,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004977906821295619,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004977906821295619,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3949373662471771,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4775615483522415,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00706937862560153,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.017484422773122787,
"eval_signal/frontier_ece_reward/group_std_mean": 0.021346506662666798,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021855528466403484,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021855528466403484,
"eval_steps_per_second": 0.177,
"step": 150
},
{
"epoch": 0.48,
"step": 150,
"train_probe_calibration/aurc": 0.15857725846094667,
"train_probe_calibration/batch_distribution_entropy": 0.9130259925773516,
"train_probe_calibration/buffer_distribution_entropy": 0.944713675838337,
"train_probe_calibration/confidence_entropy": 0.4207142744029133,
"train_probe_calibration/coverage@0%": 0.125,
"train_probe_calibration/coverage@1%": 0.125,
"train_probe_calibration/coverage@10%": 0.5,
"train_probe_calibration/coverage@15%": 0.65625,
"train_probe_calibration/coverage@20%": 0.75,
"train_probe_calibration/coverage@25%": 0.828125,
"train_probe_calibration/coverage@30%": 0.875,
"train_probe_calibration/coverage@5%": 0.390625,
"train_probe_calibration/ece": 0.20172079164661832,
"train_probe_calibration/mean_confidence": 0.5198955010753575,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 321.5,
"train_probe_completions/max_terminated_length": 321.5,
"train_probe_completions/mean_length": 178.07261657714844,
"train_probe_completions/mean_terminated_length": 178.07261657714844,
"train_probe_completions/min_length": 96.0,
"train_probe_completions/min_terminated_length": 96.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 502903501.0,
"train_probe_reward": 1.0524759888648987,
"train_probe_reward_std": 0.20973487198352814,
"train_probe_rewards/accuracy_reward": 0.654296875,
"train_probe_rewards/brier_reward": 0.8249536752700806,
"train_probe_rewards/confidence_uniqueness_reward": 0.891845703125,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.0013824773486703634,
"train_probe_rewards/frontier_coverage_1": 0.0867544673383236,
"train_probe_rewards/frontier_coverage_10": 0.0867544673383236,
"train_probe_rewards/frontier_coverage_15": 0.0867544673383236,
"train_probe_rewards/frontier_coverage_20": 0.0867544673383236,
"train_probe_rewards/frontier_coverage_25": 0.06929008662700653,
"train_probe_rewards/frontier_coverage_5": 0.0867544673383236,
"train_probe_rewards/frontier_ece_reward": 0.01398058095946908,
"train_probe_runtime": 8.5257,
"train_probe_samples_per_second": 58.646,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4410400390625,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4765031486749649,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22052001953125,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22052001953125,
"train_probe_signal/advantage_abs_mean": 0.18870525062084198,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.18870525062084198,
"train_probe_signal/advantage_pre_scale_std": 0.20722128450870514,
"train_probe_signal/advantage_std": 0.20722128450870514,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1941361352801323,
"train_probe_signal/brier_reward/group_std_mean": 0.2597276568412781,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024267016910016537,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024267016910016537,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047119140625,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05483095906674862,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005889892578125,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005889892578125,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020549558103084564,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0037329471670091152,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6783709219889715e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6783709219889715e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3552343100309372,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4747858941555023,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3552343100309372,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.4747858941555023,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3552343100309372,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.4747858941555023,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3552343100309372,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.4747858941555023,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.24588338285684586,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.33837637305259705,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004401312442496419,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004401312442496419,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3552343100309372,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4747858941555023,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006358693819493055,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.017886138521134853,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.021670137532055378,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022357673151418567,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022357673151418567,
"train_probe_steps_per_second": 0.235
},
{
"calibration/aurc": 0.36937569815613946,
"calibration/batch_distribution_entropy": 0.8804409015073205,
"calibration/buffer_distribution_entropy": 0.9450503390407532,
"calibration/confidence_entropy": 0.405562550806981,
"calibration/coverage@0%": 0.078125,
"calibration/coverage@1%": 0.078125,
"calibration/coverage@10%": 0.14140625,
"calibration/coverage@15%": 0.15390625,
"calibration/coverage@20%": 0.18828125,
"calibration/coverage@25%": 0.3640625,
"calibration/coverage@30%": 0.44765625,
"calibration/coverage@5%": 0.1078125,
"calibration/ece": 0.15152305063456092,
"calibration/mean_confidence": 0.5187107282621319,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 678.6,
"completions/max_terminated_length": 502.6,
"completions/mean_length": 184.14736328125,
"completions/mean_terminated_length": 184.01534423828124,
"completions/min_length": 83.6,
"completions/min_terminated_length": 83.6,
"epoch": 0.496,
"grad_norm": 0.005962767172604799,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 520097010.0,
"reward": 1.044260597229004,
"reward_std": 0.0687633216381073,
"rewards/accuracy_reward": 0.6216796875,
"rewards/brier_reward": 0.8228810787200928,
"rewards/confidence_uniqueness_reward": 0.942884886264801,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0015023096697404982,
"rewards/frontier_coverage_1": 0.1085168793797493,
"rewards/frontier_coverage_10": 0.1085168793797493,
"rewards/frontier_coverage_15": 0.1085168793797493,
"rewards/frontier_coverage_20": 0.1085168793797493,
"rewards/frontier_coverage_25": 0.08267375081777573,
"rewards/frontier_coverage_5": 0.1085168793797493,
"rewards/frontier_ece_reward": 0.01266906913369894,
"signal/accuracy_reward/centered_abs_mean": 0.08406982421875,
"signal/accuracy_reward/group_std_mean": 0.11525466293096542,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042034912109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042034912109375,
"signal/advantage_abs_mean": 0.051340526342391966,
"signal/advantage_pre_scale_abs_mean": 0.051340526342391966,
"signal/advantage_pre_scale_std": 0.0964614674448967,
"signal/advantage_std": 0.0964614674448967,
"signal/brier_reward/centered_abs_mean": 0.11293443143367768,
"signal/brier_reward/group_std_mean": 0.1477597326040268,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01411680392920971,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01411680392920971,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02416303977370262,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030411677807569502,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030203799717128275,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030203799717128275,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001119971019215882,
"signal/frontier_aurc_reward/group_std_mean": 0.0018046426121145487,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0047481302754023e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0047481302754023e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1459894895553589,
"signal/frontier_coverage_1/group_std_mean": 0.1935875177383423,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_10/centered_abs_mean": 0.1459894895553589,
"signal/frontier_coverage_10/group_std_mean": 0.1935875177383423,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_15/centered_abs_mean": 0.1459894895553589,
"signal/frontier_coverage_15/group_std_mean": 0.1935875177383423,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_20/centered_abs_mean": 0.1459894895553589,
"signal/frontier_coverage_20/group_std_mean": 0.1935875177383423,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_25/centered_abs_mean": 0.101905357837677,
"signal/frontier_coverage_25/group_std_mean": 0.135857430100441,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018241058802232145,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018241058802232145,
"signal/frontier_coverage_5/centered_abs_mean": 0.1459894895553589,
"signal/frontier_coverage_5/group_std_mean": 0.1935875177383423,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026132117491215467,
"signal/frontier_ece_reward/centered_abs_mean": 0.011225111037492751,
"signal/frontier_ece_reward/group_std_mean": 0.014030049927532673,
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001403138879686594,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001403138879686594,
"step": 155
},
{
"calibration/aurc": 0.19729564683186535,
"calibration/batch_distribution_entropy": 0.9365230444547216,
"calibration/buffer_distribution_entropy": 0.9458546354861728,
"calibration/confidence_entropy": 0.4251162815208058,
"calibration/coverage@0%": 0.05625,
"calibration/coverage@1%": 0.05625,
"calibration/coverage@10%": 0.409375,
"calibration/coverage@15%": 0.459375,
"calibration/coverage@20%": 0.559375,
"calibration/coverage@25%": 0.634375,
"calibration/coverage@30%": 0.828125,
"calibration/coverage@5%": 0.1625,
"calibration/ece": 0.1667415023265532,
"calibration/mean_confidence": 0.4842607593122642,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 893.0,
"completions/max_terminated_length": 456.8,
"completions/mean_length": 183.923828125,
"completions/mean_terminated_length": 183.6598693847656,
"completions/min_length": 90.2,
"completions/min_terminated_length": 90.2,
"epoch": 0.512,
"grad_norm": 0.0023144185543060303,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 537126054.0,
"reward": 1.0360616207122804,
"reward_std": 0.07599924206733703,
"rewards/accuracy_reward": 0.6048828125,
"rewards/brier_reward": 0.8223283767700196,
"rewards/confidence_uniqueness_reward": 0.9420121669769287,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.001600394258275628,
"rewards/frontier_coverage_1": 0.1132353588938713,
"rewards/frontier_coverage_10": 0.1132353588938713,
"rewards/frontier_coverage_15": 0.1132353588938713,
"rewards/frontier_coverage_20": 0.1132353588938713,
"rewards/frontier_coverage_25": 0.08508779406547547,
"rewards/frontier_coverage_5": 0.1132353588938713,
"rewards/frontier_ece_reward": 0.012370448373258115,
"signal/accuracy_reward/centered_abs_mean": 0.0976318359375,
"signal/accuracy_reward/group_std_mean": 0.13105546683073044,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04881591796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04881591796875,
"signal/advantage_abs_mean": 0.05740831717848778,
"signal/advantage_pre_scale_abs_mean": 0.05740831717848778,
"signal/advantage_pre_scale_std": 0.10611572861671448,
"signal/advantage_std": 0.10611572861671448,
"signal/brier_reward/centered_abs_mean": 0.1118384689092636,
"signal/brier_reward/group_std_mean": 0.1458802491426468,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01397980861365795,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01397980861365795,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024859635904431344,
"signal/confidence_uniqueness_reward/group_std_mean": 0.031609703600406644,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003107454488053918,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003107454488053918,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012359362561255693,
"signal/frontier_aurc_reward/group_std_mean": 0.001959600206464529,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.212325871369103e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.212325871369103e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14064022451639174,
"signal/frontier_coverage_1/group_std_mean": 0.18756941258907317,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_10/centered_abs_mean": 0.14064022451639174,
"signal/frontier_coverage_10/group_std_mean": 0.18756941258907317,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_15/centered_abs_mean": 0.14064022451639174,
"signal/frontier_coverage_15/group_std_mean": 0.18756941258907317,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_20/centered_abs_mean": 0.14064022451639174,
"signal/frontier_coverage_20/group_std_mean": 0.18756941258907317,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_25/centered_abs_mean": 0.09089281260967255,
"signal/frontier_coverage_25/group_std_mean": 0.12226969897747039,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016269813058897853,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016269813058897853,
"signal/frontier_coverage_5/centered_abs_mean": 0.14064022451639174,
"signal/frontier_coverage_5/group_std_mean": 0.18756941258907317,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002517459914088249,
"signal/frontier_ece_reward/centered_abs_mean": 0.010960309766232967,
"signal/frontier_ece_reward/group_std_mean": 0.01376073807477951,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001370038720779121,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001370038720779121,
"step": 160
},
{
"calibration/aurc": 0.10147454873978934,
"calibration/batch_distribution_entropy": 0.8797335226096754,
"calibration/buffer_distribution_entropy": 0.946214670936134,
"calibration/confidence_entropy": 0.42908605137799893,
"calibration/coverage@0%": 0.190625,
"calibration/coverage@1%": 0.190625,
"calibration/coverage@10%": 0.615625,
"calibration/coverage@15%": 0.740625,
"calibration/coverage@20%": 0.8328125,
"calibration/coverage@25%": 0.8875,
"calibration/coverage@30%": 0.9671875,
"calibration/coverage@5%": 0.4078125,
"calibration/ece": 0.15184928203773357,
"calibration/mean_confidence": 0.6085124254121397,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 697.0,
"completions/max_terminated_length": 519.8,
"completions/mean_length": 186.444140625,
"completions/mean_terminated_length": 186.31230773925782,
"completions/min_length": 93.8,
"completions/min_terminated_length": 93.8,
"epoch": 0.528,
"grad_norm": 0.0019444272620603442,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 554064778.0,
"reward": 1.0289855241775512,
"reward_std": 0.07206702530384064,
"rewards/accuracy_reward": 0.58603515625,
"rewards/brier_reward": 0.8264921069145202,
"rewards/confidence_uniqueness_reward": 0.9401045680046082,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0016523070633411407,
"rewards/frontier_coverage_1": 0.13403759896755219,
"rewards/frontier_coverage_10": 0.13403759896755219,
"rewards/frontier_coverage_15": 0.13403759896755219,
"rewards/frontier_coverage_20": 0.13403759896755219,
"rewards/frontier_coverage_25": 0.09650920405983925,
"rewards/frontier_coverage_5": 0.13403759896755219,
"rewards/frontier_ece_reward": 0.011983232945203781,
"signal/accuracy_reward/centered_abs_mean": 0.096856689453125,
"signal/accuracy_reward/group_std_mean": 0.13027185052633286,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484283447265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0484283447265625,
"signal/advantage_abs_mean": 0.05456642434000969,
"signal/advantage_pre_scale_abs_mean": 0.05456642434000969,
"signal/advantage_pre_scale_std": 0.10015368908643722,
"signal/advantage_std": 0.10015368908643722,
"signal/brier_reward/centered_abs_mean": 0.11027712374925613,
"signal/brier_reward/group_std_mean": 0.14253330528736113,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013784640468657017,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013784640468657017,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025565633177757265,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03242117166519165,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003195704147219658,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003195704147219658,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011575968354009091,
"signal/frontier_aurc_reward/group_std_mean": 0.001790312142111361,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0720982865896077e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0720982865896077e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15162838697433473,
"signal/frontier_coverage_1/group_std_mean": 0.19694490134716033,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_10/centered_abs_mean": 0.15162838697433473,
"signal/frontier_coverage_10/group_std_mean": 0.19694490134716033,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_15/centered_abs_mean": 0.15162838697433473,
"signal/frontier_coverage_15/group_std_mean": 0.19694490134716033,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_20/centered_abs_mean": 0.15162838697433473,
"signal/frontier_coverage_20/group_std_mean": 0.19694490134716033,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_25/centered_abs_mean": 0.09777042716741562,
"signal/frontier_coverage_25/group_std_mean": 0.12786214500665666,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017500906018540264,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017500906018540264,
"signal/frontier_coverage_5/centered_abs_mean": 0.15162838697433473,
"signal/frontier_coverage_5/group_std_mean": 0.19694490134716033,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027141480706632136,
"signal/frontier_ece_reward/centered_abs_mean": 0.00985901989042759,
"signal/frontier_ece_reward/group_std_mean": 0.012534209899604321,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012323774863034487,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012323774863034487,
"step": 165
},
{
"calibration/aurc": 0.17559047792121124,
"calibration/batch_distribution_entropy": 0.8843507468483722,
"calibration/buffer_distribution_entropy": 0.9468903060077588,
"calibration/confidence_entropy": 0.4229738822725511,
"calibration/coverage@0%": 0.1265655637254902,
"calibration/coverage@1%": 0.14765931372549018,
"calibration/coverage@10%": 0.48027267156862746,
"calibration/coverage@15%": 0.5803094362745098,
"calibration/coverage@20%": 0.6623805147058823,
"calibration/coverage@25%": 0.7264950980392156,
"calibration/coverage@30%": 0.789828431372549,
"calibration/coverage@5%": 0.2242218137254902,
"calibration/ece": 0.11771892982943855,
"calibration/mean_confidence": 0.6129658324860427,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 566.6,
"completions/max_terminated_length": 566.6,
"completions/mean_length": 188.92958984375,
"completions/mean_terminated_length": 188.92958984375,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.544,
"grad_norm": 0.0022145204711705446,
"learning_rate": 1e-06,
"loss": -0.0,
"num_tokens": 571163001.0,
"reward": 1.0449440240859986,
"reward_std": 0.0750869557261467,
"rewards/accuracy_reward": 0.62900390625,
"rewards/brier_reward": 0.8198571324348449,
"rewards/confidence_uniqueness_reward": 0.9451698303222656,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.001585884322412312,
"rewards/frontier_coverage_1": 0.08185996562242508,
"rewards/frontier_coverage_10": 0.08185996562242508,
"rewards/frontier_coverage_15": 0.08185996562242508,
"rewards/frontier_coverage_20": 0.08185996562242508,
"rewards/frontier_coverage_25": 0.0604823037981987,
"rewards/frontier_coverage_5": 0.08185996562242508,
"rewards/frontier_ece_reward": 0.011854531429708003,
"signal/accuracy_reward/centered_abs_mean": 0.102349853515625,
"signal/accuracy_reward/group_std_mean": 0.13563383221626282,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511749267578125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0511749267578125,
"signal/advantage_abs_mean": 0.05685669779777527,
"signal/advantage_pre_scale_abs_mean": 0.05685669779777527,
"signal/advantage_pre_scale_std": 0.10516398698091507,
"signal/advantage_std": 0.10516398698091507,
"signal/brier_reward/centered_abs_mean": 0.11613436192274093,
"signal/brier_reward/group_std_mean": 0.14911974966526031,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014516795240342616,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014516795240342616,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02417031079530716,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03060316704213619,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003021288849413395,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003021288849413395,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012515761191025376,
"signal/frontier_aurc_reward/group_std_mean": 0.001968202483840287,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.240321155113634e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.240321155113634e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14715155959129333,
"signal/frontier_coverage_1/group_std_mean": 0.19198558628559112,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_10/centered_abs_mean": 0.14715155959129333,
"signal/frontier_coverage_10/group_std_mean": 0.19198558628559112,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_15/centered_abs_mean": 0.14715155959129333,
"signal/frontier_coverage_15/group_std_mean": 0.19198558628559112,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_20/centered_abs_mean": 0.14715155959129333,
"signal/frontier_coverage_20/group_std_mean": 0.19198558628559112,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_25/centered_abs_mean": 0.09022901803255082,
"signal/frontier_coverage_25/group_std_mean": 0.11834533214569092,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016150993760675192,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016150993760675192,
"signal/frontier_coverage_5/centered_abs_mean": 0.14715155959129333,
"signal/frontier_coverage_5/group_std_mean": 0.19198558628559112,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026340128388255835,
"signal/frontier_ece_reward/centered_abs_mean": 0.010609462484717369,
"signal/frontier_ece_reward/group_std_mean": 0.01322672814130783,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001326182810589671,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001326182810589671,
"step": 170
},
{
"calibration/aurc": 0.22587778042468196,
"calibration/batch_distribution_entropy": 0.9286465435216072,
"calibration/buffer_distribution_entropy": 0.9467257565889398,
"calibration/confidence_entropy": 0.4432571774263129,
"calibration/coverage@0%": 0.015625,
"calibration/coverage@1%": 0.015625,
"calibration/coverage@10%": 0.42265625,
"calibration/coverage@15%": 0.47109375,
"calibration/coverage@20%": 0.49453125,
"calibration/coverage@25%": 0.65859375,
"calibration/coverage@30%": 0.71171875,
"calibration/coverage@5%": 0.26875,
"calibration/ece": 0.14173538803059738,
"calibration/mean_confidence": 0.5470402575486807,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 657.2,
"completions/max_terminated_length": 440.8,
"completions/mean_length": 186.93251953125,
"completions/mean_terminated_length": 186.66925354003905,
"completions/min_length": 89.4,
"completions/min_terminated_length": 89.4,
"epoch": 0.56,
"grad_norm": 0.0018579477909952402,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 587898598.0,
"reward": 1.0260156869888306,
"reward_std": 0.0709751732647419,
"rewards/accuracy_reward": 0.57705078125,
"rewards/brier_reward": 0.8331053018569946,
"rewards/confidence_uniqueness_reward": 0.9434573888778687,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0018543781014159321,
"rewards/frontier_coverage_1": 0.13779235035181045,
"rewards/frontier_coverage_10": 0.13779235035181045,
"rewards/frontier_coverage_15": 0.13779235035181045,
"rewards/frontier_coverage_20": 0.13634179830551146,
"rewards/frontier_coverage_25": 0.09466939568519592,
"rewards/frontier_coverage_5": 0.13779235035181045,
"rewards/frontier_ece_reward": 0.012398156523704528,
"signal/accuracy_reward/centered_abs_mean": 0.079791259765625,
"signal/accuracy_reward/group_std_mean": 0.1126218855381012,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398956298828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0398956298828125,
"signal/advantage_abs_mean": 0.05201718434691429,
"signal/advantage_pre_scale_abs_mean": 0.05201718434691429,
"signal/advantage_pre_scale_std": 0.09894705563783646,
"signal/advantage_std": 0.09894705563783646,
"signal/brier_reward/centered_abs_mean": 0.1116187259554863,
"signal/brier_reward/group_std_mean": 0.14699228405952453,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013952340744435788,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013952340744435788,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02636619359254837,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033408934623003005,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032957741990685464,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032957741990685464,
"signal/format_reward/centered_abs_mean": 0.0003662109375,
"signal/format_reward/group_std_mean": 0.000768545875325799,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00018310546875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014577839057892561,
"signal/frontier_aurc_reward/group_std_mean": 0.0023339309729635714,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6094331042259e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6094331042259e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13344906717538835,
"signal/frontier_coverage_1/group_std_mean": 0.17753869891166688,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_10/centered_abs_mean": 0.13344906717538835,
"signal/frontier_coverage_10/group_std_mean": 0.17753869891166688,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_15/centered_abs_mean": 0.13344906717538835,
"signal/frontier_coverage_15/group_std_mean": 0.17753869891166688,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_20/centered_abs_mean": 0.1315935179591179,
"signal/frontier_coverage_20/group_std_mean": 0.1751266449689865,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002355523919686675,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002355523919686675,
"signal/frontier_coverage_25/centered_abs_mean": 0.08300138115882874,
"signal/frontier_coverage_25/group_std_mean": 0.11093302965164184,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00148572470061481,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00148572470061481,
"signal/frontier_coverage_5/centered_abs_mean": 0.13344906717538835,
"signal/frontier_coverage_5/group_std_mean": 0.17753869891166688,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002388738188892603,
"signal/frontier_ece_reward/centered_abs_mean": 0.009788069687783719,
"signal/frontier_ece_reward/group_std_mean": 0.012408962100744247,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012235087109729649,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012235087109729649,
"step": 175
},
{
"calibration/aurc": 0.2964656946460727,
"calibration/batch_distribution_entropy": 0.9168176730321715,
"calibration/buffer_distribution_entropy": 0.9476380754401748,
"calibration/confidence_entropy": 0.4257376662970197,
"calibration/coverage@0%": 0.11015625,
"calibration/coverage@1%": 0.1140625,
"calibration/coverage@10%": 0.23984375,
"calibration/coverage@15%": 0.32734375,
"calibration/coverage@20%": 0.3671875,
"calibration/coverage@25%": 0.48046875,
"calibration/coverage@30%": 0.578125,
"calibration/coverage@5%": 0.165625,
"calibration/ece": 0.1658030474792637,
"calibration/mean_confidence": 0.5411404430872265,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1305.2,
"completions/max_terminated_length": 468.2,
"completions/mean_length": 187.0533203125,
"completions/mean_terminated_length": 186.39432373046876,
"completions/min_length": 68.0,
"completions/min_terminated_length": 68.0,
"epoch": 0.576,
"grad_norm": 0.3020451068878174,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 605000648.0,
"reward": 1.030449903011322,
"reward_std": 0.06246692091226578,
"rewards/accuracy_reward": 0.59111328125,
"rewards/brier_reward": 0.8259658694267273,
"rewards/confidence_uniqueness_reward": 0.9397091507911682,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.0018171647796407342,
"rewards/frontier_coverage_1": 0.1292146548628807,
"rewards/frontier_coverage_10": 0.1292146548628807,
"rewards/frontier_coverage_15": 0.1292146548628807,
"rewards/frontier_coverage_20": 0.12564596012234688,
"rewards/frontier_coverage_25": 0.08896546289324761,
"rewards/frontier_coverage_5": 0.1292146548628807,
"rewards/frontier_ece_reward": 0.011719273403286934,
"signal/accuracy_reward/centered_abs_mean": 0.066680908203125,
"signal/accuracy_reward/group_std_mean": 0.09644376039505005,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0333404541015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0333404541015625,
"signal/advantage_abs_mean": 0.04487398453056812,
"signal/advantage_pre_scale_abs_mean": 0.04487398453056812,
"signal/advantage_pre_scale_std": 0.08947417140007019,
"signal/advantage_std": 0.08947417140007019,
"signal/brier_reward/centered_abs_mean": 0.10441422760486603,
"signal/brier_reward/group_std_mean": 0.13690564334392546,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013051778450608254,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013051778450608254,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028500469401478767,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03677135743200779,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003562558675184846,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003562558675184846,
"signal/format_reward/centered_abs_mean": 0.001300048828125,
"signal/format_reward/group_std_mean": 0.0031943732406944036,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012849176069721579,
"signal/frontier_aurc_reward/group_std_mean": 0.001977930567227304,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3000024521024896e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3000024521024896e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13109023869037628,
"signal/frontier_coverage_1/group_std_mean": 0.1723720222711563,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_10/centered_abs_mean": 0.13109023869037628,
"signal/frontier_coverage_10/group_std_mean": 0.1723720222711563,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_15/centered_abs_mean": 0.13109023869037628,
"signal/frontier_coverage_15/group_std_mean": 0.1723720222711563,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_20/centered_abs_mean": 0.1257859319448471,
"signal/frontier_coverage_20/group_std_mean": 0.16545325815677642,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022515680640935896,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022515680640935896,
"signal/frontier_coverage_25/centered_abs_mean": 0.08115749582648277,
"signal/frontier_coverage_25/group_std_mean": 0.10648612678050995,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527191407978535,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527191407978535,
"signal/frontier_coverage_5/centered_abs_mean": 0.13109023869037628,
"signal/frontier_coverage_5/group_std_mean": 0.1723720222711563,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023465151432901623,
"signal/frontier_ece_reward/centered_abs_mean": 0.00884333048015833,
"signal/frontier_ece_reward/group_std_mean": 0.011235564388334751,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011054163100197912,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011054163100197912,
"step": 180
},
{
"calibration/aurc": 0.3515624446654767,
"calibration/batch_distribution_entropy": 0.8888446214443768,
"calibration/buffer_distribution_entropy": 0.9482682908968807,
"calibration/confidence_entropy": 0.4005931618908181,
"calibration/coverage@0%": 0.07814644607843138,
"calibration/coverage@1%": 0.07814644607843138,
"calibration/coverage@10%": 0.252469362745098,
"calibration/coverage@15%": 0.3150183823529412,
"calibration/coverage@20%": 0.3674172794117647,
"calibration/coverage@25%": 0.41276654411764707,
"calibration/coverage@30%": 0.5190686274509804,
"calibration/coverage@5%": 0.15006740196078433,
"calibration/ece": 0.12824287845261145,
"calibration/mean_confidence": 0.5009771199979778,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1110.8,
"completions/max_terminated_length": 451.4,
"completions/mean_length": 186.34189453125,
"completions/mean_terminated_length": 185.94619750976562,
"completions/min_length": 85.4,
"completions/min_terminated_length": 85.4,
"epoch": 0.592,
"grad_norm": 0.0023227103520184755,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 622076501.0,
"reward": 1.0253287315368653,
"reward_std": 0.07493966221809387,
"rewards/accuracy_reward": 0.584375,
"rewards/brier_reward": 0.8193248152732849,
"rewards/confidence_uniqueness_reward": 0.9278930783271789,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0017773719038814307,
"rewards/frontier_coverage_1": 0.1330685704946518,
"rewards/frontier_coverage_10": 0.1330685704946518,
"rewards/frontier_coverage_15": 0.1330685704946518,
"rewards/frontier_coverage_20": 0.1282924994826317,
"rewards/frontier_coverage_25": 0.0903812974691391,
"rewards/frontier_coverage_5": 0.1330685704946518,
"rewards/frontier_ece_reward": 0.011802474223077297,
"signal/accuracy_reward/centered_abs_mean": 0.09986572265625,
"signal/accuracy_reward/group_std_mean": 0.1311745300889015,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049932861328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049932861328125,
"signal/advantage_abs_mean": 0.05721670612692833,
"signal/advantage_pre_scale_abs_mean": 0.05721670612692833,
"signal/advantage_pre_scale_std": 0.10559385418891906,
"signal/advantage_std": 0.10559385418891906,
"signal/brier_reward/centered_abs_mean": 0.11419818848371506,
"signal/brier_reward/group_std_mean": 0.14777041971683502,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274773560464383,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014274773560464383,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03568760454654694,
"signal/confidence_uniqueness_reward/group_std_mean": 0.04578934088349342,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004460950568318367,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004460950568318367,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014281244948506355,
"signal/frontier_aurc_reward/group_std_mean": 0.0022319577634334563,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.55634276982164e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.55634276982164e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14765068590641023,
"signal/frontier_coverage_1/group_std_mean": 0.1923435479402542,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_10/centered_abs_mean": 0.14765068590641023,
"signal/frontier_coverage_10/group_std_mean": 0.1923435479402542,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_15/centered_abs_mean": 0.14765068590641023,
"signal/frontier_coverage_15/group_std_mean": 0.1923435479402542,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_20/centered_abs_mean": 0.13963269293308259,
"signal/frontier_coverage_20/group_std_mean": 0.18203844726085663,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002499425271525979,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002499425271525979,
"signal/frontier_coverage_25/centered_abs_mean": 0.08843920975923539,
"signal/frontier_coverage_25/group_std_mean": 0.11563192903995514,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015830618096515537,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015830618096515537,
"signal/frontier_coverage_5/centered_abs_mean": 0.14765068590641023,
"signal/frontier_coverage_5/group_std_mean": 0.1923435479402542,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026429472491145134,
"signal/frontier_ece_reward/centered_abs_mean": 0.008998825587332249,
"signal/frontier_ece_reward/group_std_mean": 0.011416062340140342,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011248531984165311,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011248531984165311,
"step": 185
},
{
"calibration/aurc": 0.19760200424062405,
"calibration/batch_distribution_entropy": 0.843288138266779,
"calibration/buffer_distribution_entropy": 0.9480241421807527,
"calibration/confidence_entropy": 0.3636719512707667,
"calibration/coverage@0%": 0.2765625,
"calibration/coverage@1%": 0.2984375,
"calibration/coverage@10%": 0.5328125,
"calibration/coverage@15%": 0.6515625,
"calibration/coverage@20%": 0.70703125,
"calibration/coverage@25%": 0.75,
"calibration/coverage@30%": 0.78125,
"calibration/coverage@5%": 0.44375,
"calibration/ece": 0.16141309983740632,
"calibration/mean_confidence": 0.5030459626625937,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 570.6,
"completions/max_terminated_length": 570.6,
"completions/mean_length": 187.1,
"completions/mean_terminated_length": 187.1,
"completions/min_length": 89.4,
"completions/min_terminated_length": 89.4,
"epoch": 0.608,
"grad_norm": 0.0017206113552674651,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 638991893.0,
"reward": 1.0279302835464477,
"reward_std": 0.05821175277233124,
"rewards/accuracy_reward": 0.5734375,
"rewards/brier_reward": 0.8467367172241211,
"rewards/confidence_uniqueness_reward": 0.9279510498046875,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.001405553543008864,
"rewards/frontier_coverage_1": 0.17639144659042358,
"rewards/frontier_coverage_10": 0.17639144659042358,
"rewards/frontier_coverage_15": 0.17639144659042358,
"rewards/frontier_coverage_20": 0.17281466871500015,
"rewards/frontier_coverage_25": 0.11612609624862671,
"rewards/frontier_coverage_5": 0.17639144659042358,
"rewards/frontier_ece_reward": 0.012792413122951984,
"signal/accuracy_reward/centered_abs_mean": 0.081982421875,
"signal/accuracy_reward/group_std_mean": 0.10979770123958588,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409912109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409912109375,
"signal/advantage_abs_mean": 0.043833667784929274,
"signal/advantage_pre_scale_abs_mean": 0.043833667784929274,
"signal/advantage_pre_scale_std": 0.08639424741268158,
"signal/advantage_std": 0.08639424741268158,
"signal/brier_reward/centered_abs_mean": 0.10321188867092132,
"signal/brier_reward/group_std_mean": 0.1343301758170128,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012901486083865165,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012901486083865165,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03328895568847656,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0417523019015789,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00416111946105957,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00416111946105957,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0010029817116446794,
"signal/frontier_aurc_reward/group_std_mean": 0.0015215349150821567,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7953372116608078e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7953372116608078e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14954022765159608,
"signal/frontier_coverage_1/group_std_mean": 0.19398094117641448,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_10/centered_abs_mean": 0.14954022765159608,
"signal/frontier_coverage_10/group_std_mean": 0.19398094117641448,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_15/centered_abs_mean": 0.14954022765159608,
"signal/frontier_coverage_15/group_std_mean": 0.19398094117641448,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_20/centered_abs_mean": 0.14172202944755555,
"signal/frontier_coverage_20/group_std_mean": 0.18404050469398497,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002536824205890298,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002536824205890298,
"signal/frontier_coverage_25/centered_abs_mean": 0.08968007564544678,
"signal/frontier_coverage_25/group_std_mean": 0.11650702059268951,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016052733408287168,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016052733408287168,
"signal/frontier_coverage_5/centered_abs_mean": 0.14954022765159608,
"signal/frontier_coverage_5/group_std_mean": 0.19398094117641448,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002676770044490695,
"signal/frontier_ece_reward/centered_abs_mean": 0.008176222257316113,
"signal/frontier_ece_reward/group_std_mean": 0.010366989858448505,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010220277821645142,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010220277821645142,
"step": 190
},
{
"calibration/aurc": 0.16803935733946962,
"calibration/batch_distribution_entropy": 0.8950542863389088,
"calibration/buffer_distribution_entropy": 0.9478421140046102,
"calibration/confidence_entropy": 0.4051885790306386,
"calibration/coverage@0%": 0.16640625,
"calibration/coverage@1%": 0.17109375,
"calibration/coverage@10%": 0.540625,
"calibration/coverage@15%": 0.609375,
"calibration/coverage@20%": 0.67265625,
"calibration/coverage@25%": 0.71484375,
"calibration/coverage@30%": 0.75390625,
"calibration/coverage@5%": 0.41953125,
"calibration/ece": 0.11327187991929444,
"calibration/mean_confidence": 0.5317124950807055,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 713.6,
"completions/max_terminated_length": 501.8,
"completions/mean_length": 188.14892578125,
"completions/mean_terminated_length": 188.01675415039062,
"completions/min_length": 91.0,
"completions/min_terminated_length": 91.0,
"epoch": 0.624,
"grad_norm": 0.0019219900714233518,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 656262442.0,
"reward": 1.0314565539360045,
"reward_std": 0.06776490807533264,
"rewards/accuracy_reward": 0.58193359375,
"rewards/brier_reward": 0.8456698775291442,
"rewards/confidence_uniqueness_reward": 0.93500657081604,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0015101159922778606,
"rewards/frontier_coverage_1": 0.16571835279464722,
"rewards/frontier_coverage_10": 0.16571835279464722,
"rewards/frontier_coverage_15": 0.16571835279464722,
"rewards/frontier_coverage_20": 0.15580750107765198,
"rewards/frontier_coverage_25": 0.10798413306474686,
"rewards/frontier_coverage_5": 0.16571835279464722,
"rewards/frontier_ece_reward": 0.011540688015520573,
"signal/accuracy_reward/centered_abs_mean": 0.094268798828125,
"signal/accuracy_reward/group_std_mean": 0.12409499287605286,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0471343994140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0471343994140625,
"signal/advantage_abs_mean": 0.05176782011985779,
"signal/advantage_pre_scale_abs_mean": 0.05176782011985779,
"signal/advantage_pre_scale_std": 0.09750174582004548,
"signal/advantage_std": 0.09750174582004548,
"signal/brier_reward/centered_abs_mean": 0.10846467316150665,
"signal/brier_reward/group_std_mean": 0.14180308282375337,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013558084145188332,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013558084145188332,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.030897776782512664,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03884159214794636,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003862222097814083,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003862222097814083,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011383457691408693,
"signal/frontier_aurc_reward/group_std_mean": 0.0017884798115119338,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.03763887839159e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.03763887839159e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14944371283054353,
"signal/frontier_coverage_1/group_std_mean": 0.19577408730983734,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_10/centered_abs_mean": 0.14944371283054353,
"signal/frontier_coverage_10/group_std_mean": 0.19577408730983734,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_15/centered_abs_mean": 0.14944371283054353,
"signal/frontier_coverage_15/group_std_mean": 0.19577408730983734,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_20/centered_abs_mean": 0.1349347472190857,
"signal/frontier_coverage_20/group_std_mean": 0.17690467536449433,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002415331965312362,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002415331965312362,
"signal/frontier_coverage_25/centered_abs_mean": 0.08353340923786164,
"signal/frontier_coverage_25/group_std_mean": 0.10984267294406891,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014952480327337981,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014952480327337981,
"signal/frontier_coverage_5/centered_abs_mean": 0.14944371283054353,
"signal/frontier_coverage_5/group_std_mean": 0.19577408730983734,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002675042301416397,
"signal/frontier_ece_reward/centered_abs_mean": 0.007968425843864679,
"signal/frontier_ece_reward/group_std_mean": 0.010179330036044121,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009960532304830849,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009960532304830849,
"step": 195
},
{
"calibration/aurc": 0.20889160874971036,
"calibration/batch_distribution_entropy": 0.9089865199203135,
"calibration/buffer_distribution_entropy": 0.9479906632560786,
"calibration/confidence_entropy": 0.4287082953337954,
"calibration/coverage@0%": 0.09921875,
"calibration/coverage@1%": 0.11796875,
"calibration/coverage@10%": 0.40703125,
"calibration/coverage@15%": 0.46484375,
"calibration/coverage@20%": 0.63671875,
"calibration/coverage@25%": 0.6953125,
"calibration/coverage@30%": 0.75078125,
"calibration/coverage@5%": 0.3515625,
"calibration/ece": 0.1674316742601562,
"calibration/mean_confidence": 0.5789995757398437,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 869.4,
"completions/max_terminated_length": 445.4,
"completions/mean_length": 196.52490234375,
"completions/mean_terminated_length": 196.00135192871093,
"completions/min_length": 96.2,
"completions/min_terminated_length": 96.2,
"epoch": 0.64,
"grad_norm": 0.0018030045321211219,
"learning_rate": 1e-06,
"loss": 0.0012,
"num_tokens": 673617545.0,
"reward": 1.0461254596710206,
"reward_std": 0.057253798097372056,
"rewards/accuracy_reward": 0.6255859375,
"rewards/brier_reward": 0.8291385531425476,
"rewards/confidence_uniqueness_reward": 0.9392925262451172,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.001513039879500866,
"rewards/frontier_coverage_1": 0.11257308274507523,
"rewards/frontier_coverage_10": 0.11257308274507523,
"rewards/frontier_coverage_15": 0.11257308274507523,
"rewards/frontier_coverage_20": 0.10006719529628753,
"rewards/frontier_coverage_25": 0.07489581555128097,
"rewards/frontier_coverage_5": 0.11257308274507523,
"rewards/frontier_ece_reward": 0.010471446067094803,
"signal/accuracy_reward/centered_abs_mean": 0.06611328125,
"signal/accuracy_reward/group_std_mean": 0.09317785650491714,
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.033056640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.033056640625,
"signal/advantage_abs_mean": 0.04163134917616844,
"signal/advantage_pre_scale_abs_mean": 0.04163134917616844,
"signal/advantage_pre_scale_std": 0.0858407735824585,
"signal/advantage_std": 0.0858407735824585,
"signal/brier_reward/centered_abs_mean": 0.09937669783830642,
"signal/brier_reward/group_std_mean": 0.13064824044704437,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012422087229788303,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012422087229788303,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02863082177937031,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03645128607749939,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035788527224212886,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035788527224212886,
"signal/format_reward/centered_abs_mean": 0.00072021484375,
"signal/format_reward/group_std_mean": 0.0014778789598494768,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000360107421875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000360107421875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011738982051610948,
"signal/frontier_aurc_reward/group_std_mean": 0.0018677733605727553,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1012776051065886e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1012776051065886e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12463102638721466,
"signal/frontier_coverage_1/group_std_mean": 0.16538253724575042,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_10/centered_abs_mean": 0.12463102638721466,
"signal/frontier_coverage_10/group_std_mean": 0.16538253724575042,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_15/centered_abs_mean": 0.12463102638721466,
"signal/frontier_coverage_15/group_std_mean": 0.16538253724575042,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_20/centered_abs_mean": 0.10897718667984009,
"signal/frontier_coverage_20/group_std_mean": 0.1449252337217331,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019506915938109159,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019506915938109159,
"signal/frontier_coverage_25/centered_abs_mean": 0.06840595453977585,
"signal/frontier_coverage_25/group_std_mean": 0.0906538799405098,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001224466529674828,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001224466529674828,
"signal/frontier_coverage_5/centered_abs_mean": 0.12463102638721466,
"signal/frontier_coverage_5/group_std_mean": 0.16538253724575042,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022308954037725927,
"signal/frontier_ece_reward/centered_abs_mean": 0.007416488416492939,
"signal/frontier_ece_reward/group_std_mean": 0.009534438140690327,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009270610520616174,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009270610520616174,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.3420038089344068,
"eval_calibration/batch_distribution_entropy": 0.9040885318071357,
"eval_calibration/buffer_distribution_entropy": 0.9499923798968546,
"eval_calibration/confidence_entropy": 0.43239845984155695,
"eval_calibration/coverage@0%": 0.125,
"eval_calibration/coverage@1%": 0.125,
"eval_calibration/coverage@10%": 0.203125,
"eval_calibration/coverage@15%": 0.203125,
"eval_calibration/coverage@20%": 0.28125,
"eval_calibration/coverage@25%": 0.359375,
"eval_calibration/coverage@30%": 0.390625,
"eval_calibration/coverage@5%": 0.125,
"eval_calibration/ece": 0.13722656249999998,
"eval_calibration/mean_confidence": 0.4819140625,
"eval_completions/clipped_ratio": 0.001953125,
"eval_completions/max_length": 964.5,
"eval_completions/max_terminated_length": 382.0,
"eval_completions/mean_length": 200.27852630615234,
"eval_completions/mean_terminated_length": 197.66400146484375,
"eval_completions/min_length": 107.0,
"eval_completions/min_terminated_length": 107.0,
"eval_loss": 0.0,
"eval_num_tokens": 673617545.0,
"eval_reward": 0.9530201256275177,
"eval_reward_std": 0.23500938713550568,
"eval_rewards/accuracy_reward": 0.439453125,
"eval_rewards/brier_reward": 0.8035316169261932,
"eval_rewards/confidence_uniqueness_reward": 0.889798104763031,
"eval_rewards/format_reward": 0.998046875,
"eval_rewards/frontier_aurc_reward": -0.002739873481914401,
"eval_rewards/frontier_coverage_1": 0.22667521983385086,
"eval_rewards/frontier_coverage_10": 0.22667521983385086,
"eval_rewards/frontier_coverage_15": 0.22667521983385086,
"eval_rewards/frontier_coverage_20": 0.1793447956442833,
"eval_rewards/frontier_coverage_25": 0.11752147227525711,
"eval_rewards/frontier_coverage_5": 0.22667521983385086,
"eval_rewards/frontier_ece_reward": 0.008872916921973228,
"eval_runtime": 19.7075,
"eval_samples_per_second": 25.371,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4754638671875,
"eval_signal/accuracy_reward/group_std_mean": 0.49497611820697784,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23773193359375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23773193359375,
"eval_signal/advantage_abs_mean": 0.21505261212587357,
"eval_signal/advantage_pre_scale_abs_mean": 0.21505261212587357,
"eval_signal/advantage_pre_scale_std": 0.2322075515985489,
"eval_signal/advantage_std": 0.2322075515985489,
"eval_signal/brier_reward/centered_abs_mean": 0.22212185710668564,
"eval_signal/brier_reward/group_std_mean": 0.27625299990177155,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027765232138335705,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.027765232138335705,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04621247202157974,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.059811294078826904,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005776559002697468,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005776559002697468,
"eval_signal/format_reward/centered_abs_mean": 0.0037841796875,
"eval_signal/format_reward/group_std_mean": 0.011048543266952038,
"eval_signal/format_reward/group_zero_std_frac": 0.9375,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003157320083118975,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.005664329044520855,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6516028053010814e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6516028053010814e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.382648304104805,
"eval_signal/frontier_coverage_1/group_std_mean": 0.46173766255378723,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.382648304104805,
"eval_signal/frontier_coverage_10/group_std_mean": 0.46173766255378723,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.382648304104805,
"eval_signal/frontier_coverage_15/group_std_mean": 0.46173766255378723,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.2975280433893204,
"eval_signal/frontier_coverage_20/group_std_mean": 0.36239591240882874,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005325751379132271,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005325751379132271,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1779879480600357,
"eval_signal/frontier_coverage_25/group_std_mean": 0.22136619687080383,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031859842129051685,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031859842129051685,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.382648304104805,
"eval_signal/frontier_coverage_5/group_std_mean": 0.46173766255378723,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0068494039587676525,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.013139450456947088,
"eval_signal/frontier_ece_reward/group_std_mean": 0.015696686692535877,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001642431307118386,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001642431307118386,
"eval_steps_per_second": 0.101,
"step": 200
},
{
"epoch": 0.64,
"step": 200,
"train_probe_calibration/aurc": 0.14380049777966852,
"train_probe_calibration/batch_distribution_entropy": 0.8924841198751866,
"train_probe_calibration/buffer_distribution_entropy": 0.9501962946765351,
"train_probe_calibration/confidence_entropy": 0.4064653950766257,
"train_probe_calibration/coverage@0%": 0.0625,
"train_probe_calibration/coverage@1%": 0.0625,
"train_probe_calibration/coverage@10%": 0.6875,
"train_probe_calibration/coverage@15%": 0.796875,
"train_probe_calibration/coverage@20%": 0.875,
"train_probe_calibration/coverage@25%": 0.921875,
"train_probe_calibration/coverage@30%": 0.96875,
"train_probe_calibration/coverage@5%": 0.0625,
"train_probe_calibration/ece": 0.21375468749999998,
"train_probe_calibration/mean_confidence": 0.5793390625,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 316.5,
"train_probe_completions/max_terminated_length": 316.5,
"train_probe_completions/mean_length": 195.48914337158203,
"train_probe_completions/mean_terminated_length": 195.48914337158203,
"train_probe_completions/min_length": 101.0,
"train_probe_completions/min_terminated_length": 101.0,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 673617545.0,
"train_probe_reward": 1.0627794861793518,
"train_probe_reward_std": 0.2138308882713318,
"train_probe_rewards/accuracy_reward": 0.66796875,
"train_probe_rewards/brier_reward": 0.8411527872085571,
"train_probe_rewards/confidence_uniqueness_reward": 0.90185546875,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.0011366689577698708,
"train_probe_rewards/frontier_coverage_1": 0.09746142104268074,
"train_probe_rewards/frontier_coverage_10": 0.09746142104268074,
"train_probe_rewards/frontier_coverage_15": 0.09746142104268074,
"train_probe_rewards/frontier_coverage_20": 0.08396613597869873,
"train_probe_rewards/frontier_coverage_25": 0.06902317516505718,
"train_probe_rewards/frontier_coverage_5": 0.09746142104268074,
"train_probe_rewards/frontier_ece_reward": 0.009781356435269117,
"train_probe_runtime": 9.0902,
"train_probe_samples_per_second": 55.004,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.434326171875,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4729345738887787,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2171630859375,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2171630859375,
"train_probe_signal/advantage_abs_mean": 0.19102784246206284,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.19102784246206284,
"train_probe_signal/advantage_pre_scale_std": 0.21111667901277542,
"train_probe_signal/advantage_std": 0.21111667901277542,
"train_probe_signal/brier_reward/centered_abs_mean": 0.1861182525753975,
"train_probe_signal/brier_reward/group_std_mean": 0.24657447636127472,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023264781571924686,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.023264781571924686,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039306640625,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.046158455312252045,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004913330078125,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004913330078125,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0019272951176390052,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0035583705175668,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.449858195381239e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.449858195381239e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3332698345184326,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.45197173953056335,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3332698345184326,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.45197173953056335,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3332698345184326,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.45197173953056335,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.256347618997097,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.35391244292259216,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045886223670095205,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045886223670095205,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1468118354678154,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.20914901793003082,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026279317680746317,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026279317680746317,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3332698345184326,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.45197173953056335,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005965529475361109,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.012552765663713217,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.014878344256430864,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001569095707964152,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001569095707964152,
"train_probe_steps_per_second": 0.22
},
{
"calibration/aurc": 0.24817290495089597,
"calibration/batch_distribution_entropy": 0.9204595927573562,
"calibration/buffer_distribution_entropy": 0.9519832870818661,
"calibration/confidence_entropy": 0.4577226375814408,
"calibration/coverage@0%": 0.025,
"calibration/coverage@1%": 0.1078125,
"calibration/coverage@10%": 0.3125,
"calibration/coverage@15%": 0.35234375,
"calibration/coverage@20%": 0.4484375,
"calibration/coverage@25%": 0.49609375,
"calibration/coverage@30%": 0.6015625,
"calibration/coverage@5%": 0.240625,
"calibration/ece": 0.1720056808846374,
"calibration/mean_confidence": 0.5100622878653626,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 656.8,
"completions/max_terminated_length": 496.6,
"completions/mean_length": 199.54921875,
"completions/mean_terminated_length": 199.4187744140625,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.656,
"grad_norm": 0.0016656734514981508,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 690517473.0,
"reward": 1.0183162331581115,
"reward_std": 0.06802579239010811,
"rewards/accuracy_reward": 0.57275390625,
"rewards/brier_reward": 0.8095171332359314,
"rewards/confidence_uniqueness_reward": 0.94515380859375,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0018861858639866113,
"rewards/frontier_coverage_1": 0.12252766788005828,
"rewards/frontier_coverage_10": 0.12252766788005828,
"rewards/frontier_coverage_15": 0.12252766788005828,
"rewards/frontier_coverage_20": 0.09871871173381805,
"rewards/frontier_coverage_25": 0.0694797769188881,
"rewards/frontier_coverage_5": 0.12252766788005828,
"rewards/frontier_ece_reward": 0.007234203815460205,
"signal/accuracy_reward/centered_abs_mean": 0.083831787109375,
"signal/accuracy_reward/group_std_mean": 0.11424745023250579,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419158935546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0419158935546875,
"signal/advantage_abs_mean": 0.05140817314386368,
"signal/advantage_pre_scale_abs_mean": 0.05140817314386368,
"signal/advantage_pre_scale_std": 0.09806035608053207,
"signal/advantage_std": 0.09806035608053207,
"signal/brier_reward/centered_abs_mean": 0.1077189490199089,
"signal/brier_reward/group_std_mean": 0.13838136196136475,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013464868627488613,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013464868627488613,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421807125210762,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030718856677412986,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030272589065134525,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030272589065134525,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013030647998675704,
"signal/frontier_aurc_reward/group_std_mean": 0.0020229590591043234,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3324858921114357e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3324858921114357e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13521387726068496,
"signal/frontier_coverage_1/group_std_mean": 0.17738903760910035,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_10/centered_abs_mean": 0.13521387726068496,
"signal/frontier_coverage_10/group_std_mean": 0.17738903760910035,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_15/centered_abs_mean": 0.13521387726068496,
"signal/frontier_coverage_15/group_std_mean": 0.17738903760910035,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_20/centered_abs_mean": 0.09841903001070022,
"signal/frontier_coverage_20/group_std_mean": 0.129475200176239,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017617005854845048,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017617005854845048,
"signal/frontier_coverage_25/centered_abs_mean": 0.06278965771198272,
"signal/frontier_coverage_25/group_std_mean": 0.08188406601548195,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011239348677918315,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011239348677918315,
"signal/frontier_coverage_5/centered_abs_mean": 0.13521387726068496,
"signal/frontier_coverage_5/group_std_mean": 0.17738903760910035,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002420328464359045,
"signal/frontier_ece_reward/centered_abs_mean": 0.006564310565590858,
"signal/frontier_ece_reward/group_std_mean": 0.008403288014233113,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008205388206988573,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008205388206988573,
"step": 205
},
{
"calibration/aurc": 0.23136369662906525,
"calibration/batch_distribution_entropy": 0.8958516870167121,
"calibration/buffer_distribution_entropy": 0.9554491916207283,
"calibration/confidence_entropy": 0.43047974707442044,
"calibration/coverage@0%": 0.07734375,
"calibration/coverage@1%": 0.07734375,
"calibration/coverage@10%": 0.31953125,
"calibration/coverage@15%": 0.3921875,
"calibration/coverage@20%": 0.44375,
"calibration/coverage@25%": 0.5265625,
"calibration/coverage@30%": 0.60625,
"calibration/coverage@5%": 0.23203125,
"calibration/ece": 0.1621687781762295,
"calibration/mean_confidence": 0.5560812218237705,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 436.2,
"completions/max_terminated_length": 436.2,
"completions/mean_length": 204.09091796875,
"completions/mean_terminated_length": 204.09091796875,
"completions/min_length": 102.6,
"completions/min_terminated_length": 102.6,
"epoch": 0.672,
"grad_norm": 0.0015550514217466116,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 707520804.0,
"reward": 1.026635193824768,
"reward_std": 0.06110656931996346,
"rewards/accuracy_reward": 0.5787109375,
"rewards/brier_reward": 0.8387270212173462,
"rewards/confidence_uniqueness_reward": 0.9363265991210937,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.00141967604868114,
"rewards/frontier_coverage_1": 0.15640246868133545,
"rewards/frontier_coverage_10": 0.15640246868133545,
"rewards/frontier_coverage_15": 0.1499548703432083,
"rewards/frontier_coverage_20": 0.10669813752174377,
"rewards/frontier_coverage_25": 0.08252269625663758,
"rewards/frontier_coverage_5": 0.15640246868133545,
"rewards/frontier_ece_reward": 0.0076270273886621,
"signal/accuracy_reward/centered_abs_mean": 0.0878173828125,
"signal/accuracy_reward/group_std_mean": 0.1166835829615593,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04390869140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04390869140625,
"signal/advantage_abs_mean": 0.04624823108315468,
"signal/advantage_pre_scale_abs_mean": 0.04624823108315468,
"signal/advantage_pre_scale_std": 0.09073985815048217,
"signal/advantage_std": 0.09073985815048217,
"signal/brier_reward/centered_abs_mean": 0.1011570304632187,
"signal/brier_reward/group_std_mean": 0.13063574135303496,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012644628807902337,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012644628807902337,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028185939788818358,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03486784622073173,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035232424736022947,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035232424736022947,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00101193260634318,
"signal/frontier_aurc_reward/group_std_mean": 0.0015801386674866081,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8113592523150147e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8113592523150147e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14319152235984803,
"signal/frontier_coverage_1/group_std_mean": 0.18724198639392853,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_coverage_10/centered_abs_mean": 0.14319152235984803,
"signal/frontier_coverage_10/group_std_mean": 0.18724198639392853,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_coverage_15/centered_abs_mean": 0.13852950036525727,
"signal/frontier_coverage_15/group_std_mean": 0.18155628740787505,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024796778801828624,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024796778801828624,
"signal/frontier_coverage_20/centered_abs_mean": 0.08701228499412536,
"signal/frontier_coverage_20/group_std_mean": 0.11472053080797195,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00155751989223063,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00155751989223063,
"signal/frontier_coverage_25/centered_abs_mean": 0.05806139260530472,
"signal/frontier_coverage_25/group_std_mean": 0.07511216998100281,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010392988799139858,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010392988799139858,
"signal/frontier_coverage_5/centered_abs_mean": 0.14319152235984803,
"signal/frontier_coverage_5/group_std_mean": 0.18724198639392853,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025631281081587077,
"signal/frontier_ece_reward/centered_abs_mean": 0.005760752130299807,
"signal/frontier_ece_reward/group_std_mean": 0.007287882454693318,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007200940162874758,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007200940162874758,
"step": 210
},
{
"calibration/aurc": 0.2169351861265752,
"calibration/batch_distribution_entropy": 0.906085427880875,
"calibration/buffer_distribution_entropy": 0.95874229714237,
"calibration/confidence_entropy": 0.4680126493425715,
"calibration/coverage@0%": 0.09546875,
"calibration/coverage@1%": 0.09546875,
"calibration/coverage@10%": 0.4090379901960784,
"calibration/coverage@15%": 0.5012714460784313,
"calibration/coverage@20%": 0.5692738970588235,
"calibration/coverage@25%": 0.6333884803921569,
"calibration/coverage@30%": 0.7037837009803922,
"calibration/coverage@5%": 0.2745772058823529,
"calibration/ece": 0.1512060885389182,
"calibration/mean_confidence": 0.5770298367061799,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 668.2,
"completions/max_terminated_length": 486.8,
"completions/mean_length": 212.39873046875,
"completions/mean_terminated_length": 212.26973571777344,
"completions/min_length": 104.2,
"completions/min_terminated_length": 104.2,
"epoch": 0.688,
"grad_norm": 0.001888699596747756,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 724649687.0,
"reward": 1.0408958196640015,
"reward_std": 0.06385916396975518,
"rewards/accuracy_reward": 0.61318359375,
"rewards/brier_reward": 0.8376299142837524,
"rewards/confidence_uniqueness_reward": 0.9420908451080322,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.001473946124315262,
"rewards/frontier_coverage_1": 0.12142772302031517,
"rewards/frontier_coverage_10": 0.12142772302031517,
"rewards/frontier_coverage_15": 0.10840724855661392,
"rewards/frontier_coverage_20": 0.07632499039173127,
"rewards/frontier_coverage_25": 0.0743546724319458,
"rewards/frontier_coverage_5": 0.12142772302031517,
"rewards/frontier_ece_reward": 0.0060465382412076,
"signal/accuracy_reward/centered_abs_mean": 0.085443115234375,
"signal/accuracy_reward/group_std_mean": 0.11665472537279128,
"signal/accuracy_reward/group_zero_std_frac": 0.65,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0427215576171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0427215576171875,
"signal/advantage_abs_mean": 0.04668809846043587,
"signal/advantage_pre_scale_abs_mean": 0.04668809846043587,
"signal/advantage_pre_scale_std": 0.09376905411481858,
"signal/advantage_std": 0.09376905411481858,
"signal/brier_reward/centered_abs_mean": 0.09790285527706147,
"signal/brier_reward/group_std_mean": 0.1284557342529297,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012237856909632683,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012237856909632683,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02564612701535225,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03227175809442997,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003205765876919031,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003205765876919031,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011468618642538786,
"signal/frontier_aurc_reward/group_std_mean": 0.0019072068389505148,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0528827008092776e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0528827008092776e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13615999221801758,
"signal/frontier_coverage_1/group_std_mean": 0.1743500828742981,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_coverage_10/centered_abs_mean": 0.13615999221801758,
"signal/frontier_coverage_10/group_std_mean": 0.1743500828742981,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_coverage_15/centered_abs_mean": 0.11814617216587067,
"signal/frontier_coverage_15/group_std_mean": 0.15128694474697113,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002114816382527351,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002114816382527351,
"signal/frontier_coverage_20/centered_abs_mean": 0.07219749391078949,
"signal/frontier_coverage_20/group_std_mean": 0.09231588244438171,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012923351023346186,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012923351023346186,
"signal/frontier_coverage_25/centered_abs_mean": 0.05291619151830673,
"signal/frontier_coverage_25/group_std_mean": 0.06708001494407653,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009471998200751841,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009471998200751841,
"signal/frontier_coverage_5/centered_abs_mean": 0.13615999221801758,
"signal/frontier_coverage_5/group_std_mean": 0.1743500828742981,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024372637271881104,
"signal/frontier_ece_reward/centered_abs_mean": 0.004961969796568155,
"signal/frontier_ece_reward/group_std_mean": 0.00630278754979372,
"signal/frontier_ece_reward/group_zero_std_frac": 0.009375,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006202462245710194,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006202462245710194,
"step": 215
},
{
"calibration/aurc": 0.11948550052681228,
"calibration/batch_distribution_entropy": 0.7879994954219934,
"calibration/buffer_distribution_entropy": 0.9586850802270618,
"calibration/confidence_entropy": 0.3649859275848222,
"calibration/coverage@0%": 0.265625,
"calibration/coverage@1%": 0.26796875,
"calibration/coverage@10%": 0.496875,
"calibration/coverage@15%": 0.634375,
"calibration/coverage@20%": 0.78203125,
"calibration/coverage@25%": 0.83515625,
"calibration/coverage@30%": 0.8984375,
"calibration/coverage@5%": 0.3890625,
"calibration/ece": 0.13410406490920107,
"calibration/mean_confidence": 0.6672344767574656,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 652.2,
"completions/max_terminated_length": 434.8,
"completions/mean_length": 211.9447265625,
"completions/mean_terminated_length": 211.81568603515626,
"completions/min_length": 103.6,
"completions/min_terminated_length": 103.6,
"epoch": 0.704,
"grad_norm": 0.0016022155759856105,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 741686145.0,
"reward": 1.0412675619125367,
"reward_std": 0.060601814091205596,
"rewards/accuracy_reward": 0.61259765625,
"rewards/brier_reward": 0.8442665100097656,
"rewards/confidence_uniqueness_reward": 0.937886118888855,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0017726486083120107,
"rewards/frontier_coverage_1": 0.12525941878557206,
"rewards/frontier_coverage_10": 0.12525941878557206,
"rewards/frontier_coverage_15": 0.10406550467014312,
"rewards/frontier_coverage_20": 0.07677052170038223,
"rewards/frontier_coverage_25": 0.09108839333057403,
"rewards/frontier_coverage_5": 0.12525941878557206,
"rewards/frontier_ece_reward": 0.005490910448133946,
"signal/accuracy_reward/centered_abs_mean": 0.074908447265625,
"signal/accuracy_reward/group_std_mean": 0.09916009157896041,
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0374542236328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0374542236328125,
"signal/advantage_abs_mean": 0.04598864167928696,
"signal/advantage_pre_scale_abs_mean": 0.04598864167928696,
"signal/advantage_pre_scale_std": 0.09303600341081619,
"signal/advantage_std": 0.09303600341081619,
"signal/brier_reward/centered_abs_mean": 0.09707934856414795,
"signal/brier_reward/group_std_mean": 0.12730673998594283,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012134918570518493,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012134918570518493,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028879277408123016,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03636922165751457,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003609909676015377,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003609909676015377,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014877181965857744,
"signal/frontier_aurc_reward/group_std_mean": 0.0023924733977764845,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6630155844031833e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6630155844031833e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1195068359375,
"signal/frontier_coverage_1/group_std_mean": 0.15731086134910582,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_coverage_10/centered_abs_mean": 0.1195068359375,
"signal/frontier_coverage_10/group_std_mean": 0.15731086134910582,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_coverage_15/centered_abs_mean": 0.09091014117002487,
"signal/frontier_coverage_15/group_std_mean": 0.1199584573507309,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016272914595901965,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016272914595901965,
"signal/frontier_coverage_20/centered_abs_mean": 0.05923491641879082,
"signal/frontier_coverage_20/group_std_mean": 0.0771937534213066,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010603050119243561,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010603050119243561,
"signal/frontier_coverage_25/centered_abs_mean": 0.053304193913936614,
"signal/frontier_coverage_25/group_std_mean": 0.0676953986287117,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009541450766846537,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009541450766846537,
"signal/frontier_coverage_5/centered_abs_mean": 0.1195068359375,
"signal/frontier_coverage_5/group_std_mean": 0.15731086134910582,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002139172307215631,
"signal/frontier_ece_reward/centered_abs_mean": 0.004334048368036747,
"signal/frontier_ece_reward/group_std_mean": 0.0055789993144571785,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005417560460045934,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005417560460045934,
"step": 220
},
{
"calibration/aurc": 0.14470123646878835,
"calibration/batch_distribution_entropy": 0.8739225583129173,
"calibration/buffer_distribution_entropy": 0.9555200094663506,
"calibration/confidence_entropy": 0.391513768008919,
"calibration/coverage@0%": 0.0875,
"calibration/coverage@1%": 0.0875,
"calibration/coverage@10%": 0.5484375,
"calibration/coverage@15%": 0.675,
"calibration/coverage@20%": 0.72890625,
"calibration/coverage@25%": 0.77265625,
"calibration/coverage@30%": 0.81953125,
"calibration/coverage@5%": 0.31953125,
"calibration/ece": 0.15345659905795017,
"calibration/mean_confidence": 0.6121585571920498,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 562.2,
"completions/max_terminated_length": 562.2,
"completions/mean_length": 214.69287109375,
"completions/mean_terminated_length": 214.69287109375,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.72,
"grad_norm": 0.0017858616774901748,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 758894456.0,
"reward": 1.0460729122161865,
"reward_std": 0.0672881230711937,
"rewards/accuracy_reward": 0.62294921875,
"rewards/brier_reward": 0.8469637155532836,
"rewards/confidence_uniqueness_reward": 0.935894775390625,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.001939373160712421,
"rewards/frontier_coverage_1": 0.11669339537620545,
"rewards/frontier_coverage_10": 0.11649550646543502,
"rewards/frontier_coverage_15": 0.09009001255035401,
"rewards/frontier_coverage_20": 0.07372135147452355,
"rewards/frontier_coverage_25": 0.1068428099155426,
"rewards/frontier_coverage_5": 0.11669339537620545,
"rewards/frontier_ece_reward": 0.005344946216791868,
"signal/accuracy_reward/centered_abs_mean": 0.082562255859375,
"signal/accuracy_reward/group_std_mean": 0.1146527960896492,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412811279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0412811279296875,
"signal/advantage_abs_mean": 0.04909345507621765,
"signal/advantage_pre_scale_abs_mean": 0.04909345507621765,
"signal/advantage_pre_scale_std": 0.10091503411531448,
"signal/advantage_std": 0.10091503411531448,
"signal/brier_reward/centered_abs_mean": 0.0935791552066803,
"signal/brier_reward/group_std_mean": 0.12280905842781067,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011697394400835037,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011697394400835037,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02970266342163086,
"signal/confidence_uniqueness_reward/group_std_mean": 0.037385367602109906,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037128329277038574,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037128329277038574,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017587365116924047,
"signal/frontier_aurc_reward/group_std_mean": 0.0027441283222287894,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1481383120990356e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1481383120990356e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10815362930297852,
"signal/frontier_coverage_1/group_std_mean": 0.14316221177577973,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019359499448910356,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019359499448910356,
"signal/frontier_coverage_10/centered_abs_mean": 0.10738050639629364,
"signal/frontier_coverage_10/group_std_mean": 0.1421646863222122,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019221110735088587,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019221110735088587,
"signal/frontier_coverage_15/centered_abs_mean": 0.0754195511341095,
"signal/frontier_coverage_15/group_std_mean": 0.10019035190343857,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013500099536031484,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013500099536031484,
"signal/frontier_coverage_20/centered_abs_mean": 0.052413633465766905,
"signal/frontier_coverage_20/group_std_mean": 0.0678664654493332,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009382039890624583,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009382039890624583,
"signal/frontier_coverage_25/centered_abs_mean": 0.05627275034785271,
"signal/frontier_coverage_25/group_std_mean": 0.07161930799484253,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010072821867652237,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010072821867652237,
"signal/frontier_coverage_5/centered_abs_mean": 0.10815362930297852,
"signal/frontier_coverage_5/group_std_mean": 0.14316221177577973,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019359499448910356,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019359499448910356,
"signal/frontier_ece_reward/centered_abs_mean": 0.0037975626531988383,
"signal/frontier_ece_reward/group_std_mean": 0.004970707837492228,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004746953316498548,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004746953316498548,
"step": 225
},
{
"calibration/aurc": 0.10276625605061121,
"calibration/batch_distribution_entropy": 0.7755610517250693,
"calibration/buffer_distribution_entropy": 0.9506196997878165,
"calibration/confidence_entropy": 0.3668382010462838,
"calibration/coverage@0%": 0.16171875,
"calibration/coverage@1%": 0.171875,
"calibration/coverage@10%": 0.60703125,
"calibration/coverage@15%": 0.70390625,
"calibration/coverage@20%": 0.83125,
"calibration/coverage@25%": 0.925,
"calibration/coverage@30%": 0.94921875,
"calibration/coverage@5%": 0.48046875,
"calibration/ece": 0.09331710770831818,
"calibration/mean_confidence": 0.7038330452083182,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 523.8,
"completions/max_terminated_length": 523.8,
"completions/mean_length": 220.8560546875,
"completions/mean_terminated_length": 220.8560546875,
"completions/min_length": 100.4,
"completions/min_terminated_length": 100.4,
"epoch": 0.736,
"grad_norm": 0.002271299483254552,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 776095606.0,
"reward": 1.0492503643035889,
"reward_std": 0.06152931973338127,
"rewards/accuracy_reward": 0.6287109375,
"rewards/brier_reward": 0.8473744511604309,
"rewards/confidence_uniqueness_reward": 0.9358478307723999,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0017740631010383368,
"rewards/frontier_coverage_1": 0.11950143873691559,
"rewards/frontier_coverage_10": 0.11690339148044586,
"rewards/frontier_coverage_15": 0.09336267858743667,
"rewards/frontier_coverage_20": 0.0775704950094223,
"rewards/frontier_coverage_25": 0.11357748061418534,
"rewards/frontier_coverage_5": 0.11950143873691559,
"rewards/frontier_ece_reward": 0.004873855458572507,
"signal/accuracy_reward/centered_abs_mean": 0.0777587890625,
"signal/accuracy_reward/group_std_mean": 0.10373825207352638,
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125,
"signal/advantage_abs_mean": 0.046919054538011554,
"signal/advantage_pre_scale_abs_mean": 0.046919054538011554,
"signal/advantage_pre_scale_std": 0.0959189236164093,
"signal/advantage_std": 0.0959189236164093,
"signal/brier_reward/centered_abs_mean": 0.0924751952290535,
"signal/brier_reward/group_std_mean": 0.1207397997379303,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011559399403631687,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011559399403631687,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533731937408447,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0374361515045166,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003691716492176056,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003691716492176056,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001516377995721996,
"signal/frontier_aurc_reward/group_std_mean": 0.002370060421526432,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.714316433412023e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.714316433412023e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11420103460550309,
"signal/frontier_coverage_1/group_std_mean": 0.15096487402915953,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020441983826458452,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020441983826458452,
"signal/frontier_coverage_10/centered_abs_mean": 0.11071749776601791,
"signal/frontier_coverage_10/group_std_mean": 0.14640629887580872,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019818432396277786,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019818432396277786,
"signal/frontier_coverage_15/centered_abs_mean": 0.0779910683631897,
"signal/frontier_coverage_15/group_std_mean": 0.1036263257265091,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013960400596261025,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013960400596261025,
"signal/frontier_coverage_20/centered_abs_mean": 0.05327008962631226,
"signal/frontier_coverage_20/group_std_mean": 0.06925814524292946,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009535345481708646,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009535345481708646,
"signal/frontier_coverage_25/centered_abs_mean": 0.05671848207712173,
"signal/frontier_coverage_25/group_std_mean": 0.07240066826343536,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001015260792337358,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001015260792337358,
"signal/frontier_coverage_5/centered_abs_mean": 0.11420103460550309,
"signal/frontier_coverage_5/group_std_mean": 0.15096487402915953,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020441983826458452,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020441983826458452,
"signal/frontier_ece_reward/centered_abs_mean": 0.0037134474609047175,
"signal/frontier_ece_reward/group_std_mean": 0.004857833497226238,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004641809326130897,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004641809326130897,
"step": 230
},
{
"calibration/aurc": 0.15163492450002442,
"calibration/batch_distribution_entropy": 0.8904229891990763,
"calibration/buffer_distribution_entropy": 0.9448809655937878,
"calibration/confidence_entropy": 0.3949929669354096,
"calibration/coverage@0%": 0.07835171568627451,
"calibration/coverage@1%": 0.14788296568627451,
"calibration/coverage@10%": 0.5339736519607843,
"calibration/coverage@15%": 0.626219362745098,
"calibration/coverage@20%": 0.7043719362745098,
"calibration/coverage@25%": 0.7840992647058823,
"calibration/coverage@30%": 0.8411642156862745,
"calibration/coverage@5%": 0.3206341911764706,
"calibration/ece": 0.13040471542112503,
"calibration/mean_confidence": 0.5816119304946545,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 682.0,
"completions/max_terminated_length": 494.4,
"completions/mean_length": 218.71201171875,
"completions/mean_terminated_length": 218.4533935546875,
"completions/min_length": 112.0,
"completions/min_terminated_length": 112.0,
"epoch": 0.752,
"grad_norm": 0.001796262338757515,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 793562417.0,
"reward": 1.0391303777694703,
"reward_std": 0.06580677628517151,
"rewards/accuracy_reward": 0.61123046875,
"rewards/brier_reward": 0.8342607021331787,
"rewards/confidence_uniqueness_reward": 0.9415198564529419,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0019529166864231228,
"rewards/frontier_coverage_1": 0.1229474276304245,
"rewards/frontier_coverage_10": 0.1206100896000862,
"rewards/frontier_coverage_15": 0.09027208015322685,
"rewards/frontier_coverage_20": 0.07027497664093971,
"rewards/frontier_coverage_25": 0.09396415501832962,
"rewards/frontier_coverage_5": 0.1229474276304245,
"rewards/frontier_ece_reward": 0.004472200945019722,
"signal/accuracy_reward/centered_abs_mean": 0.080157470703125,
"signal/accuracy_reward/group_std_mean": 0.10887984037399293,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400787353515625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400787353515625,
"signal/advantage_abs_mean": 0.04903002083301544,
"signal/advantage_pre_scale_abs_mean": 0.04903002083301544,
"signal/advantage_pre_scale_std": 0.1000540629029274,
"signal/advantage_std": 0.1000540629029274,
"signal/brier_reward/centered_abs_mean": 0.09556291699409485,
"signal/brier_reward/group_std_mean": 0.124751777946949,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011945364624261856,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011945364624261856,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026840757578611374,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033848896622657776,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033550946973264217,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033550946973264217,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001602224470116198,
"signal/frontier_aurc_reward/group_std_mean": 0.002494157268665731,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8679816023213788e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8679816023213788e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11630584448575973,
"signal/frontier_coverage_1/group_std_mean": 0.15271863341331482,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002081874618306756,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002081874618306756,
"signal/frontier_coverage_10/centered_abs_mean": 0.11303210407495498,
"signal/frontier_coverage_10/group_std_mean": 0.14861542731523514,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002023274498060346,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002023274498060346,
"signal/frontier_coverage_15/centered_abs_mean": 0.07724076434969902,
"signal/frontier_coverage_15/group_std_mean": 0.10189598947763442,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013826095964759588,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013826095964759588,
"signal/frontier_coverage_20/centered_abs_mean": 0.05278810262680054,
"signal/frontier_coverage_20/group_std_mean": 0.06799793317914009,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009449069970287382,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009449069970287382,
"signal/frontier_coverage_25/centered_abs_mean": 0.05864310711622238,
"signal/frontier_coverage_25/group_std_mean": 0.0744215801358223,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010497116250917315,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010497116250917315,
"signal/frontier_coverage_5/centered_abs_mean": 0.11630584448575973,
"signal/frontier_coverage_5/group_std_mean": 0.15271863341331482,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002081874618306756,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002081874618306756,
"signal/frontier_ece_reward/centered_abs_mean": 0.0036135178990662096,
"signal/frontier_ece_reward/group_std_mean": 0.0046929454430937765,
"signal/frontier_ece_reward/group_zero_std_frac": 0.015625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004516897373832762,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004516897373832762,
"step": 235
},
{
"calibration/aurc": 0.1354460653145263,
"calibration/batch_distribution_entropy": 0.908943110934102,
"calibration/buffer_distribution_entropy": 0.940215096807232,
"calibration/confidence_entropy": 0.4264768475786309,
"calibration/coverage@0%": 0.24072610294117647,
"calibration/coverage@1%": 0.2852573529411765,
"calibration/coverage@10%": 0.5294638480392158,
"calibration/coverage@15%": 0.63828125,
"calibration/coverage@20%": 0.7453125,
"calibration/coverage@25%": 0.81015625,
"calibration/coverage@30%": 0.871875,
"calibration/coverage@5%": 0.44806985294117646,
"calibration/ece": 0.16837747358087166,
"calibration/mean_confidence": 0.5540928773798913,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1317.8,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 222.9625,
"completions/mean_terminated_length": 222.44960021972656,
"completions/min_length": 104.6,
"completions/min_terminated_length": 104.6,
"epoch": 0.768,
"grad_norm": 0.0016375478589907289,
"learning_rate": 1e-06,
"loss": 0.0011,
"num_tokens": 810778257.0,
"reward": 1.0248207330703736,
"reward_std": 0.061895406991243365,
"rewards/accuracy_reward": 0.576953125,
"rewards/brier_reward": 0.8413738012313843,
"rewards/confidence_uniqueness_reward": 0.9406145691871644,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.001507855043746531,
"rewards/frontier_coverage_1": 0.1511134535074234,
"rewards/frontier_coverage_10": 0.14939744472503663,
"rewards/frontier_coverage_15": 0.11022275984287262,
"rewards/frontier_coverage_20": 0.08167696744203568,
"rewards/frontier_coverage_25": 0.09813316464424134,
"rewards/frontier_coverage_5": 0.1511134535074234,
"rewards/frontier_ece_reward": 0.00472887079231441,
"signal/accuracy_reward/centered_abs_mean": 0.07352294921875,
"signal/accuracy_reward/group_std_mean": 0.09980905205011367,
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.036761474609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.036761474609375,
"signal/advantage_abs_mean": 0.045818436145782473,
"signal/advantage_pre_scale_abs_mean": 0.045818436145782473,
"signal/advantage_pre_scale_std": 0.0930859088897705,
"signal/advantage_std": 0.0930859088897705,
"signal/brier_reward/centered_abs_mean": 0.09857990890741349,
"signal/brier_reward/group_std_mean": 0.13005568087100983,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012322488613426686,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012322488613426686,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026614753901958464,
"signal/confidence_uniqueness_reward/group_std_mean": 0.034846174716949466,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003326844237744808,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003326844237744808,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0011487239389680326,
"signal/frontier_aurc_reward/group_std_mean": 0.0017788737313821912,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0562157442327588e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0562157442327588e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1333732545375824,
"signal/frontier_coverage_1/group_std_mean": 0.17638799846172332,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023873811587691307,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023873811587691307,
"signal/frontier_coverage_10/centered_abs_mean": 0.13058110177516938,
"signal/frontier_coverage_10/group_std_mean": 0.17269828617572786,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023374016396701335,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023374016396701335,
"signal/frontier_coverage_15/centered_abs_mean": 0.08957252502441407,
"signal/frontier_coverage_15/group_std_mean": 0.11884426325559616,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016033481108024717,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016033481108024717,
"signal/frontier_coverage_20/centered_abs_mean": 0.06021819338202476,
"signal/frontier_coverage_20/group_std_mean": 0.0782925844192505,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010779056698083877,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010779056698083877,
"signal/frontier_coverage_25/centered_abs_mean": 0.057765302062034604,
"signal/frontier_coverage_25/group_std_mean": 0.07384001463651657,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001033998851198703,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001033998851198703,
"signal/frontier_coverage_5/centered_abs_mean": 0.1333732545375824,
"signal/frontier_coverage_5/group_std_mean": 0.17638799846172332,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023873811587691307,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023873811587691307,
"signal/frontier_ece_reward/centered_abs_mean": 0.0036720467731356623,
"signal/frontier_ece_reward/group_std_mean": 0.004740559495985508,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004590058466419578,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004590058466419578,
"step": 240
},
{
"calibration/aurc": 0.1780989063016482,
"calibration/batch_distribution_entropy": 0.8732530039163613,
"calibration/buffer_distribution_entropy": 0.9368918455830914,
"calibration/confidence_entropy": 0.37058566571107476,
"calibration/coverage@0%": 0.16484375,
"calibration/coverage@1%": 0.1953125,
"calibration/coverage@10%": 0.44375,
"calibration/coverage@15%": 0.503125,
"calibration/coverage@20%": 0.5875,
"calibration/coverage@25%": 0.71484375,
"calibration/coverage@30%": 0.7625,
"calibration/coverage@5%": 0.315625,
"calibration/ece": 0.10408663335191717,
"calibration/mean_confidence": 0.5442599486038305,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 693.4,
"completions/max_terminated_length": 591.2,
"completions/mean_length": 223.282421875,
"completions/mean_terminated_length": 223.15485534667968,
"completions/min_length": 110.4,
"completions/min_terminated_length": 110.4,
"epoch": 0.784,
"grad_norm": 0.0017808079719543457,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 828239037.0,
"reward": 1.0469671964645386,
"reward_std": 0.0657818466424942,
"rewards/accuracy_reward": 0.63125,
"rewards/brier_reward": 0.8252038955688477,
"rewards/confidence_uniqueness_reward": 0.9421940207481384,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0015878735110163688,
"rewards/frontier_coverage_1": 0.10298990458250046,
"rewards/frontier_coverage_10": 0.10325277298688888,
"rewards/frontier_coverage_15": 0.08138184025883674,
"rewards/frontier_coverage_20": 0.06994581818580628,
"rewards/frontier_coverage_25": 0.10279036164283753,
"rewards/frontier_coverage_5": 0.10298990458250046,
"rewards/frontier_ece_reward": 0.0036763294599950315,
"signal/accuracy_reward/centered_abs_mean": 0.08505859375,
"signal/accuracy_reward/group_std_mean": 0.11480707228183747,
"signal/accuracy_reward/group_zero_std_frac": 0.665625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042529296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.042529296875,
"signal/advantage_abs_mean": 0.04915754199028015,
"signal/advantage_pre_scale_abs_mean": 0.04915754199028015,
"signal/advantage_pre_scale_std": 0.09866253137588502,
"signal/advantage_std": 0.09866253137588502,
"signal/brier_reward/centered_abs_mean": 0.1016099825501442,
"signal/brier_reward/group_std_mean": 0.13323958665132524,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012701247818768024,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012701247818768024,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02635921761393547,
"signal/confidence_uniqueness_reward/group_std_mean": 0.033456063643097875,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032949022017419336,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032949022017419336,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001376147347036749,
"signal/frontier_aurc_reward/group_std_mean": 0.002203846746124327,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.463303608237766e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.463303608237766e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13170208930969238,
"signal/frontier_coverage_1/group_std_mean": 0.1737958937883377,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023574673570692537,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023574673570692537,
"signal/frontier_coverage_10/centered_abs_mean": 0.12848464101552964,
"signal/frontier_coverage_10/group_std_mean": 0.16970953047275544,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022998749278485774,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022998749278485774,
"signal/frontier_coverage_15/centered_abs_mean": 0.08622983396053314,
"signal/frontier_coverage_15/group_std_mean": 0.11427305340766906,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015435139182955026,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015435139182955026,
"signal/frontier_coverage_20/centered_abs_mean": 0.05881091207265854,
"signal/frontier_coverage_20/group_std_mean": 0.0768322467803955,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010527152917347848,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010527152917347848,
"signal/frontier_coverage_25/centered_abs_mean": 0.05912056043744087,
"signal/frontier_coverage_25/group_std_mean": 0.07621604949235916,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010582579649053513,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010582579649053513,
"signal/frontier_coverage_5/centered_abs_mean": 0.13170208930969238,
"signal/frontier_coverage_5/group_std_mean": 0.1737958937883377,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023574673570692537,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023574673570692537,
"signal/frontier_ece_reward/centered_abs_mean": 0.003539442550390959,
"signal/frontier_ece_reward/group_std_mean": 0.004590986762195826,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044243031879886985,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044243031879886985,
"step": 245
},
{
"calibration/aurc": 0.26110712774742,
"calibration/batch_distribution_entropy": 0.8637357131440693,
"calibration/buffer_distribution_entropy": 0.9338101197498352,
"calibration/confidence_entropy": 0.41911873533648614,
"calibration/coverage@0%": 0.025,
"calibration/coverage@1%": 0.025,
"calibration/coverage@10%": 0.30546875,
"calibration/coverage@15%": 0.34453125,
"calibration/coverage@20%": 0.54609375,
"calibration/coverage@25%": 0.5859375,
"calibration/coverage@30%": 0.63671875,
"calibration/coverage@5%": 0.078125,
"calibration/ece": 0.1646667575702236,
"calibration/mean_confidence": 0.5018031308830292,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 694.4,
"completions/max_terminated_length": 509.2,
"completions/mean_length": 220.36611328125,
"completions/mean_terminated_length": 220.2374237060547,
"completions/min_length": 105.8,
"completions/min_terminated_length": 105.8,
"epoch": 0.8,
"grad_norm": 0.002036831108853221,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 845506146.0,
"reward": 1.0645583629608155,
"reward_std": 0.06164888888597488,
"rewards/accuracy_reward": 0.6568359375,
"rewards/brier_reward": 0.8579505681991577,
"rewards/confidence_uniqueness_reward": 0.9386770725250244,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.001275635720230639,
"rewards/frontier_coverage_1": 0.11075811237096786,
"rewards/frontier_coverage_10": 0.10875446647405625,
"rewards/frontier_coverage_15": 0.08313208520412445,
"rewards/frontier_coverage_20": 0.07641463130712509,
"rewards/frontier_coverage_25": 0.13115044236183165,
"rewards/frontier_coverage_5": 0.11075811237096786,
"rewards/frontier_ece_reward": 0.004146079532802105,
"signal/accuracy_reward/centered_abs_mean": 0.0808837890625,
"signal/accuracy_reward/group_std_mean": 0.10505216717720031,
"signal/accuracy_reward/group_zero_std_frac": 0.703125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04044189453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04044189453125,
"signal/advantage_abs_mean": 0.04755032882094383,
"signal/advantage_pre_scale_abs_mean": 0.04755032882094383,
"signal/advantage_pre_scale_std": 0.09882079064846039,
"signal/advantage_std": 0.09882079064846039,
"signal/brier_reward/centered_abs_mean": 0.08948185741901397,
"signal/brier_reward/group_std_mean": 0.1173609048128128,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011185232177376747,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011185232177376747,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.028289894759654998,
"signal/confidence_uniqueness_reward/group_std_mean": 0.035136304795742035,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035362368449568748,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035362368449568748,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013092580833472312,
"signal/frontier_aurc_reward/group_std_mean": 0.0021080786129459737,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.343571886740392e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.343571886740392e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11353515535593033,
"signal/frontier_coverage_1/group_std_mean": 0.15011467039585114,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020322792232036592,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020322792232036592,
"signal/frontier_coverage_10/centered_abs_mean": 0.10932374000549316,
"signal/frontier_coverage_10/group_std_mean": 0.14465901702642442,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001956894900649786,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001956894900649786,
"signal/frontier_coverage_15/centered_abs_mean": 0.07167089506983756,
"signal/frontier_coverage_15/group_std_mean": 0.0952614426612854,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001282908977009356,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001282908977009356,
"signal/frontier_coverage_20/centered_abs_mean": 0.05188070461153984,
"signal/frontier_coverage_20/group_std_mean": 0.0674702912569046,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009286645916290581,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009286645916290581,
"signal/frontier_coverage_25/centered_abs_mean": 0.06000246405601502,
"signal/frontier_coverage_25/group_std_mean": 0.0765003427863121,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010740441037341952,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010740441037341952,
"signal/frontier_coverage_5/centered_abs_mean": 0.11353515535593033,
"signal/frontier_coverage_5/group_std_mean": 0.15011467039585114,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020322792232036592,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020322792232036592,
"signal/frontier_ece_reward/centered_abs_mean": 0.0031503901816904547,
"signal/frontier_ece_reward/group_std_mean": 0.004117331793531775,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039379877271130683,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039379877271130683,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4328469329018436,
"eval_calibration/batch_distribution_entropy": 0.9017827126146071,
"eval_calibration/buffer_distribution_entropy": 0.933450971645078,
"eval_calibration/confidence_entropy": 0.4392779969401228,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.0625,
"eval_calibration/coverage@15%": 0.0625,
"eval_calibration/coverage@20%": 0.25,
"eval_calibration/coverage@25%": 0.3125,
"eval_calibration/coverage@30%": 0.34375,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.214326634140625,
"eval_calibration/mean_confidence": 0.5412016341406249,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 416.0,
"eval_completions/max_terminated_length": 416.0,
"eval_completions/mean_length": 219.94239044189453,
"eval_completions/mean_terminated_length": 219.94239044189453,
"eval_completions/min_length": 115.5,
"eval_completions/min_terminated_length": 115.5,
"eval_loss": 0.0,
"eval_num_tokens": 845506146.0,
"eval_reward": 0.9467860460281372,
"eval_reward_std": 0.24670489132404327,
"eval_rewards/accuracy_reward": 0.44140625,
"eval_rewards/brier_reward": 0.7873809337615967,
"eval_rewards/confidence_uniqueness_reward": 0.890869140625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004038012819364667,
"eval_rewards/frontier_coverage_1": 0.20367811620235443,
"eval_rewards/frontier_coverage_10": 0.19325406849384308,
"eval_rewards/frontier_coverage_15": 0.12953777611255646,
"eval_rewards/frontier_coverage_20": 0.08314738422632217,
"eval_rewards/frontier_coverage_25": 0.06818825379014015,
"eval_rewards/frontier_coverage_5": 0.20367811620235443,
"eval_rewards/frontier_ece_reward": 0.004763010889291763,
"eval_runtime": 10.7531,
"eval_samples_per_second": 46.498,
"eval_signal/accuracy_reward/centered_abs_mean": 0.473388671875,
"eval_signal/accuracy_reward/group_std_mean": 0.4939229190349579,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2366943359375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2366943359375,
"eval_signal/advantage_abs_mean": 0.22974882274866104,
"eval_signal/advantage_pre_scale_abs_mean": 0.22974882274866104,
"eval_signal/advantage_pre_scale_std": 0.2435239553451538,
"eval_signal/advantage_std": 0.2435239553451538,
"eval_signal/brier_reward/centered_abs_mean": 0.23106026649475098,
"eval_signal/brier_reward/group_std_mean": 0.2880419045686722,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028882533311843872,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.028882533311843872,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0494232177734375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.058502499014139175,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061779022216796875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061779022216796875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00543490145355463,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010733058210462332,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.728474105941132e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.728474105941132e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.34148095548152924,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4213644117116928,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006112508941441774,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006112508941441774,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3236210346221924,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4000513255596161,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005792815936729312,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005792815936729312,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.20580045133829117,
"eval_signal/frontier_coverage_15/group_std_mean": 0.2604397386312485,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036838280502706766,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036838280502706766,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.11519244313240051,
"eval_signal/frontier_coverage_20/group_std_mean": 0.1483083888888359,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020619446877390146,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020619446877390146,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.1481623351573944,
"eval_signal/frontier_coverage_25/group_std_mean": 0.19164805114269257,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00265210575889796,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00265210575889796,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34148095548152924,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4213644117116928,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006112508941441774,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006112508941441774,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006984395207837224,
"eval_signal/frontier_ece_reward/group_std_mean": 0.008835344575345516,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000873049400979653,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000873049400979653,
"eval_steps_per_second": 0.186,
"step": 250
},
{
"epoch": 0.8,
"step": 250,
"train_probe_calibration/aurc": 0.12986536721544725,
"train_probe_calibration/batch_distribution_entropy": 0.811839844274961,
"train_probe_calibration/buffer_distribution_entropy": 0.933494045269648,
"train_probe_calibration/confidence_entropy": 0.354105295763641,
"train_probe_calibration/coverage@0%": 0.140625,
"train_probe_calibration/coverage@1%": 0.140625,
"train_probe_calibration/coverage@10%": 0.609375,
"train_probe_calibration/coverage@15%": 0.765625,
"train_probe_calibration/coverage@20%": 0.828125,
"train_probe_calibration/coverage@25%": 0.90625,
"train_probe_calibration/coverage@30%": 0.921875,
"train_probe_calibration/coverage@5%": 0.484375,
"train_probe_calibration/ece": 0.13531250000000003,
"train_probe_calibration/mean_confidence": 0.624875,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 376.0,
"train_probe_completions/max_terminated_length": 376.0,
"train_probe_completions/mean_length": 217.32789611816406,
"train_probe_completions/mean_terminated_length": 217.32789611816406,
"train_probe_completions/min_length": 120.5,
"train_probe_completions/min_terminated_length": 120.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 845506146.0,
"train_probe_reward": 1.0561645030975342,
"train_probe_reward_std": 0.2325623854994774,
"train_probe_rewards/accuracy_reward": 0.654296875,
"train_probe_rewards/brier_reward": 0.8493243455886841,
"train_probe_rewards/confidence_uniqueness_reward": 0.889404296875,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.001908503647428006,
"train_probe_rewards/frontier_coverage_1": 0.11155515164136887,
"train_probe_rewards/frontier_coverage_10": 0.10592306032776833,
"train_probe_rewards/frontier_coverage_15": 0.08040037006139755,
"train_probe_rewards/frontier_coverage_20": 0.0775928758084774,
"train_probe_rewards/frontier_coverage_25": 0.1379874050617218,
"train_probe_rewards/frontier_coverage_5": 0.11155515164136887,
"train_probe_rewards/frontier_ece_reward": 0.004171320935711265,
"train_probe_runtime": 10.1866,
"train_probe_samples_per_second": 49.084,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4449462890625,
"train_probe_signal/accuracy_reward/group_std_mean": 0.4788653701543808,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22247314453125,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22247314453125,
"train_probe_signal/advantage_abs_mean": 0.21122215688228607,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.21122215688228607,
"train_probe_signal/advantage_pre_scale_std": 0.22976724058389664,
"train_probe_signal/advantage_std": 0.22976724058389664,
"train_probe_signal/brier_reward/centered_abs_mean": 0.18194539844989777,
"train_probe_signal/brier_reward/group_std_mean": 0.245933398604393,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02274317480623722,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02274317480623722,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0509185791015625,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.061591994017362595,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063648223876953125,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063648223876953125,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.003361418261192739,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.006412317277863622,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.016938641550951e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.016938641550951e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3020322024822235,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.4129178822040558,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0054063762072473764,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0054063762072473764,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2831447720527649,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.3897576928138733,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050682914443314075,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0050682914443314075,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1778106540441513,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.2535991668701172,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031828106148168445,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031828106148168445,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1011722981929779,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.14043454825878143,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018109841039404273,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018109841039404273,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.14492832124233246,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.17802315205335617,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002594216726720333,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002594216726720333,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3020322024822235,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.4129178822040558,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0054063762072473764,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0054063762072473764,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.006162431091070175,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.008223664714023471,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007703038863837719,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007703038863837719,
"train_probe_steps_per_second": 0.196
},
{
"calibration/aurc": 0.2613374077664524,
"calibration/batch_distribution_entropy": 0.8562545855944862,
"calibration/buffer_distribution_entropy": 0.9332085807167052,
"calibration/confidence_entropy": 0.36503547135583225,
"calibration/coverage@0%": 0.0875,
"calibration/coverage@1%": 0.0921875,
"calibration/coverage@10%": 0.27109375,
"calibration/coverage@15%": 0.346875,
"calibration/coverage@20%": 0.40078125,
"calibration/coverage@25%": 0.44921875,
"calibration/coverage@30%": 0.6296875,
"calibration/coverage@5%": 0.17265625,
"calibration/ece": 0.14208048120424616,
"calibration/mean_confidence": 0.5988117062957538,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 768.4,
"completions/max_terminated_length": 564.4,
"completions/mean_length": 213.418359375,
"completions/mean_terminated_length": 213.2895263671875,
"completions/min_length": 100.2,
"completions/min_terminated_length": 100.2,
"epoch": 0.816,
"grad_norm": 0.0023187189362943172,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 862790718.0,
"reward": 1.053348708152771,
"reward_std": 0.06448897942900658,
"rewards/accuracy_reward": 0.64423828125,
"rewards/brier_reward": 0.8300428271293641,
"rewards/confidence_uniqueness_reward": 0.9373907327651978,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002119234437122941,
"rewards/frontier_coverage_1": 0.09330451190471649,
"rewards/frontier_coverage_10": 0.09116496592760086,
"rewards/frontier_coverage_15": 0.0719268336892128,
"rewards/frontier_coverage_20": 0.07347770035266876,
"rewards/frontier_coverage_25": 0.13299526423215866,
"rewards/frontier_coverage_5": 0.09330451190471649,
"rewards/frontier_ece_reward": 0.0034532100893557073,
"signal/accuracy_reward/centered_abs_mean": 0.082476806640625,
"signal/accuracy_reward/group_std_mean": 0.10868191868066787,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412384033203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0412384033203125,
"signal/advantage_abs_mean": 0.048784293979406354,
"signal/advantage_pre_scale_abs_mean": 0.048784293979406354,
"signal/advantage_pre_scale_std": 0.09951501935720444,
"signal/advantage_std": 0.09951501935720444,
"signal/brier_reward/centered_abs_mean": 0.10197662115097046,
"signal/brier_reward/group_std_mean": 0.13159122467041015,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012747077643871308,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012747077643871308,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02778756096959114,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03503857851028443,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034734451211988924,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034734451211988924,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020503590581938624,
"signal/frontier_aurc_reward/group_std_mean": 0.003305292781442404,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6701426142826674e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6701426142826674e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12355931252241134,
"signal/frontier_coverage_1/group_std_mean": 0.1595274031162262,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022117116721346976,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022117116721346976,
"signal/frontier_coverage_10/centered_abs_mean": 0.11546845138072967,
"signal/frontier_coverage_10/group_std_mean": 0.1491788625717163,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020668851910158994,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020668851910158994,
"signal/frontier_coverage_15/centered_abs_mean": 0.07564910650253295,
"signal/frontier_coverage_15/group_std_mean": 0.09829453229904175,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013541190419346094,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013541190419346094,
"signal/frontier_coverage_20/centered_abs_mean": 0.05537274181842804,
"signal/frontier_coverage_20/group_std_mean": 0.0709018051624298,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000991172017529607,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000991172017529607,
"signal/frontier_coverage_25/centered_abs_mean": 0.06835410594940186,
"signal/frontier_coverage_25/group_std_mean": 0.08684322088956833,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012235384434461593,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012235384434461593,
"signal/frontier_coverage_5/centered_abs_mean": 0.12355931252241134,
"signal/frontier_coverage_5/group_std_mean": 0.1595274031162262,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022117116721346976,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022117116721346976,
"signal/frontier_ece_reward/centered_abs_mean": 0.0032832324504852295,
"signal/frontier_ece_reward/group_std_mean": 0.0042684660758823155,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004104040563106537,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004104040563106537,
"step": 255
},
{
"calibration/aurc": 0.29902088331269355,
"calibration/batch_distribution_entropy": 0.8771544045312076,
"calibration/buffer_distribution_entropy": 0.9328252547851499,
"calibration/confidence_entropy": 0.3984631748431898,
"calibration/coverage@0%": 0.18125,
"calibration/coverage@1%": 0.18359375,
"calibration/coverage@10%": 0.2484375,
"calibration/coverage@15%": 0.26953125,
"calibration/coverage@20%": 0.37109375,
"calibration/coverage@25%": 0.5015625,
"calibration/coverage@30%": 0.58515625,
"calibration/coverage@5%": 0.21953125,
"calibration/ece": 0.14786460040812144,
"calibration/mean_confidence": 0.5620528753555174,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.8,
"completions/max_terminated_length": 467.8,
"completions/mean_length": 210.1916015625,
"completions/mean_terminated_length": 210.1916015625,
"completions/min_length": 95.4,
"completions/min_terminated_length": 95.4,
"epoch": 0.832,
"grad_norm": 0.001390106393955648,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 879951432.0,
"reward": 1.0441203117370605,
"reward_std": 0.05937432199716568,
"rewards/accuracy_reward": 0.61279296875,
"rewards/brier_reward": 0.85388263463974,
"rewards/confidence_uniqueness_reward": 0.9362106323242188,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0018755205674096942,
"rewards/frontier_coverage_1": 0.1422416090965271,
"rewards/frontier_coverage_10": 0.13426189720630646,
"rewards/frontier_coverage_15": 0.09782664477825165,
"rewards/frontier_coverage_20": 0.09015188366174698,
"rewards/frontier_coverage_25": 0.1458705931901932,
"rewards/frontier_coverage_5": 0.1422416090965271,
"rewards/frontier_ece_reward": 0.004194558784365654,
"signal/accuracy_reward/centered_abs_mean": 0.071063232421875,
"signal/accuracy_reward/group_std_mean": 0.09939071238040924,
"signal/accuracy_reward/group_zero_std_frac": 0.7,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0355316162109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0355316162109375,
"signal/advantage_abs_mean": 0.043323104828596117,
"signal/advantage_pre_scale_abs_mean": 0.043323104828596117,
"signal/advantage_pre_scale_std": 0.09220470041036606,
"signal/advantage_std": 0.09220470041036606,
"signal/brier_reward/centered_abs_mean": 0.08896346092224121,
"signal/brier_reward/group_std_mean": 0.11624416410923004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011120432615280151,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011120432615280151,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.027381277084350585,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03401793241500854,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003422659635543823,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003422659635543823,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0018187327776104211,
"signal/frontier_aurc_reward/group_std_mean": 0.0029950566589832307,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.255531628383323e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.255531628383323e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11554279178380966,
"signal/frontier_coverage_1/group_std_mean": 0.1498140126466751,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002068215887993574,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002068215887993574,
"signal/frontier_coverage_10/centered_abs_mean": 0.10581835210323334,
"signal/frontier_coverage_10/group_std_mean": 0.13711453676223756,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018941484624519945,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018941484624519945,
"signal/frontier_coverage_15/centered_abs_mean": 0.07099459692835808,
"signal/frontier_coverage_15/group_std_mean": 0.09155822247266769,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012708032154478133,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012708032154478133,
"signal/frontier_coverage_20/centered_abs_mean": 0.05304303243756294,
"signal/frontier_coverage_20/group_std_mean": 0.066974838078022,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009494703030213713,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009494703030213713,
"signal/frontier_coverage_25/centered_abs_mean": 0.06249256357550621,
"signal/frontier_coverage_25/group_std_mean": 0.08098939657211304,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011186168296262622,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011186168296262622,
"signal/frontier_coverage_5/centered_abs_mean": 0.11554279178380966,
"signal/frontier_coverage_5/group_std_mean": 0.1498140126466751,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002068215887993574,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002068215887993574,
"signal/frontier_ece_reward/centered_abs_mean": 0.003021185612305999,
"signal/frontier_ece_reward/group_std_mean": 0.003916465956717729,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037764820153824986,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037764820153824986,
"step": 260
},
{
"calibration/aurc": 0.16050257914279836,
"calibration/batch_distribution_entropy": 0.8431910556560627,
"calibration/buffer_distribution_entropy": 0.931636889634975,
"calibration/confidence_entropy": 0.3986462888374449,
"calibration/coverage@0%": 0.12734375,
"calibration/coverage@1%": 0.1640625,
"calibration/coverage@10%": 0.41015625,
"calibration/coverage@15%": 0.50234375,
"calibration/coverage@20%": 0.76328125,
"calibration/coverage@25%": 0.8515625,
"calibration/coverage@30%": 0.93046875,
"calibration/coverage@5%": 0.26171875,
"calibration/ece": 0.14965263001674728,
"calibration/mean_confidence": 0.6677016463180183,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 720.6,
"completions/max_terminated_length": 503.2,
"completions/mean_length": 205.6150390625,
"completions/mean_terminated_length": 205.48502502441406,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.848,
"grad_norm": 0.0016696910606697202,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 897071298.0,
"reward": 1.0344059467315674,
"reward_std": 0.05915949493646622,
"rewards/accuracy_reward": 0.5978515625,
"rewards/brier_reward": 0.8411754608154297,
"rewards/confidence_uniqueness_reward": 0.9387550115585327,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0017502504400908948,
"rewards/frontier_coverage_1": 0.13980764299631118,
"rewards/frontier_coverage_10": 0.12554386407136917,
"rewards/frontier_coverage_15": 0.09122110307216644,
"rewards/frontier_coverage_20": 0.0802506908774376,
"rewards/frontier_coverage_25": 0.12608129382133484,
"rewards/frontier_coverage_5": 0.13980764299631118,
"rewards/frontier_ece_reward": 0.00392393465153873,
"signal/accuracy_reward/centered_abs_mean": 0.0715087890625,
"signal/accuracy_reward/group_std_mean": 0.09724359661340713,
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03575439453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03575439453125,
"signal/advantage_abs_mean": 0.044149909913539884,
"signal/advantage_pre_scale_abs_mean": 0.044149909913539884,
"signal/advantage_pre_scale_std": 0.0919294998049736,
"signal/advantage_std": 0.0919294998049736,
"signal/brier_reward/centered_abs_mean": 0.0906279519200325,
"signal/brier_reward/group_std_mean": 0.12001040577888489,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011328493990004063,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011328493990004063,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.026908674091100693,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0335762545466423,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033635842613875867,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033635842613875867,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001596922567114234,
"signal/frontier_aurc_reward/group_std_mean": 0.0026214892510324716,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.858491352526471e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.858491352526471e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12007757127285004,
"signal/frontier_coverage_1/group_std_mean": 0.16089081168174743,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149388426914811,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149388426914811,
"signal/frontier_coverage_10/centered_abs_mean": 0.1089574933052063,
"signal/frontier_coverage_10/group_std_mean": 0.14638633131980897,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019503391114994884,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019503391114994884,
"signal/frontier_coverage_15/centered_abs_mean": 0.07160564810037613,
"signal/frontier_coverage_15/group_std_mean": 0.0962700754404068,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012817410985007881,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012817410985007881,
"signal/frontier_coverage_20/centered_abs_mean": 0.0523877888917923,
"signal/frontier_coverage_20/group_std_mean": 0.06873219013214112,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009377413894981146,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009377413894981146,
"signal/frontier_coverage_25/centered_abs_mean": 0.06207484975457191,
"signal/frontier_coverage_25/group_std_mean": 0.08022152930498123,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011111397529020906,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011111397529020906,
"signal/frontier_coverage_5/centered_abs_mean": 0.12007757127285004,
"signal/frontier_coverage_5/group_std_mean": 0.16089081168174743,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002149388426914811,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002149388426914811,
"signal/frontier_ece_reward/centered_abs_mean": 0.0029800481628626586,
"signal/frontier_ece_reward/group_std_mean": 0.003989115683361888,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003725060203578323,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003725060203578323,
"step": 265
},
{
"calibration/aurc": 0.1268785574113444,
"calibration/batch_distribution_entropy": 0.8655835602170416,
"calibration/buffer_distribution_entropy": 0.930216643410773,
"calibration/confidence_entropy": 0.38734716180223816,
"calibration/coverage@0%": 0.37265625,
"calibration/coverage@1%": 0.5234375,
"calibration/coverage@10%": 0.6515625,
"calibration/coverage@15%": 0.684375,
"calibration/coverage@20%": 0.7078125,
"calibration/coverage@25%": 0.7296875,
"calibration/coverage@30%": 0.7578125,
"calibration/coverage@5%": 0.60625,
"calibration/ece": 0.18136702633101748,
"calibration/mean_confidence": 0.6521458386893639,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 934.0,
"completions/max_terminated_length": 527.6,
"completions/mean_length": 207.2068359375,
"completions/mean_terminated_length": 206.9475830078125,
"completions/min_length": 102.4,
"completions/min_terminated_length": 102.4,
"epoch": 0.864,
"grad_norm": 0.002203070791438222,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 914179912.0,
"reward": 1.0581453800201417,
"reward_std": 0.063059052079916,
"rewards/accuracy_reward": 0.65205078125,
"rewards/brier_reward": 0.836116099357605,
"rewards/confidence_uniqueness_reward": 0.9410740494728088,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0015314666437916459,
"rewards/frontier_coverage_1": 0.09230080395936965,
"rewards/frontier_coverage_10": 0.08503075465559959,
"rewards/frontier_coverage_15": 0.06719348207116127,
"rewards/frontier_coverage_20": 0.07197408005595207,
"rewards/frontier_coverage_25": 0.1352065086364746,
"rewards/frontier_coverage_5": 0.09230080395936965,
"rewards/frontier_ece_reward": 0.002868586964905262,
"signal/accuracy_reward/centered_abs_mean": 0.080035400390625,
"signal/accuracy_reward/group_std_mean": 0.10791658908128739,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400177001953125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0400177001953125,
"signal/advantage_abs_mean": 0.04706686735153198,
"signal/advantage_pre_scale_abs_mean": 0.04706686735153198,
"signal/advantage_pre_scale_std": 0.09770552664995194,
"signal/advantage_std": 0.09770552664995194,
"signal/brier_reward/centered_abs_mean": 0.09740178287029266,
"signal/brier_reward/group_std_mean": 0.12451921701431275,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012175222858786583,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012175222858786583,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025182069465517997,
"signal/confidence_uniqueness_reward/group_std_mean": 0.031810386851429936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031477586831897496,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031477586831897496,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001567194890230894,
"signal/frontier_aurc_reward/group_std_mean": 0.0025726008461788297,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8052787092747168e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8052787092747168e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12118019163608551,
"signal/frontier_coverage_1/group_std_mean": 0.15813361406326293,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002169125364162028,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002169125364162028,
"signal/frontier_coverage_10/centered_abs_mean": 0.10587679147720337,
"signal/frontier_coverage_10/group_std_mean": 0.13836515247821807,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018951945239678025,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018951945239678025,
"signal/frontier_coverage_15/centered_abs_mean": 0.07084731981158257,
"signal/frontier_coverage_15/group_std_mean": 0.0918091282248497,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012681669555604457,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012681669555604457,
"signal/frontier_coverage_20/centered_abs_mean": 0.05579846650362015,
"signal/frontier_coverage_20/group_std_mean": 0.07049720138311386,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009987925528548657,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009987925528548657,
"signal/frontier_coverage_25/centered_abs_mean": 0.06971824020147324,
"signal/frontier_coverage_25/group_std_mean": 0.08754518479108811,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012479565106332303,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012479565106332303,
"signal/frontier_coverage_5/centered_abs_mean": 0.12118019163608551,
"signal/frontier_coverage_5/group_std_mean": 0.15813361406326293,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002169125364162028,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002169125364162028,
"signal/frontier_ece_reward/centered_abs_mean": 0.0029707029927521942,
"signal/frontier_ece_reward/group_std_mean": 0.0038403474260121583,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003713378740940243,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003713378740940243,
"step": 270
},
{
"calibration/aurc": 0.273963740303666,
"calibration/batch_distribution_entropy": 0.8652895848583995,
"calibration/buffer_distribution_entropy": 0.9298649992702075,
"calibration/confidence_entropy": 0.37132087278349013,
"calibration/coverage@0%": 0.10390625,
"calibration/coverage@1%": 0.125,
"calibration/coverage@10%": 0.25390625,
"calibration/coverage@15%": 0.2765625,
"calibration/coverage@20%": 0.38828125,
"calibration/coverage@25%": 0.4703125,
"calibration/coverage@30%": 0.5703125,
"calibration/coverage@5%": 0.18046875,
"calibration/ece": 0.16565321925298856,
"calibration/mean_confidence": 0.5717535064279697,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 677.8,
"completions/max_terminated_length": 465.0,
"completions/mean_length": 201.7654296875,
"completions/mean_terminated_length": 201.63529052734376,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.88,
"grad_norm": 0.001780420308932662,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 931393062.0,
"reward": 1.0194225072860719,
"reward_std": 0.0662582591176033,
"rewards/accuracy_reward": 0.57275390625,
"rewards/brier_reward": 0.8221846461296082,
"rewards/confidence_uniqueness_reward": 0.9409846425056457,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002486996748484671,
"rewards/frontier_coverage_1": 0.14020877778530122,
"rewards/frontier_coverage_10": 0.12404286712408066,
"rewards/frontier_coverage_15": 0.08958611041307449,
"rewards/frontier_coverage_20": 0.07855436801910401,
"rewards/frontier_coverage_25": 0.11499525308609009,
"rewards/frontier_coverage_5": 0.14020877778530122,
"rewards/frontier_ece_reward": 0.00347807789221406,
"signal/accuracy_reward/centered_abs_mean": 0.084307861328125,
"signal/accuracy_reward/group_std_mean": 0.11367884427309036,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625,
"signal/advantage_abs_mean": 0.05013991966843605,
"signal/advantage_pre_scale_abs_mean": 0.05013991966843605,
"signal/advantage_pre_scale_std": 0.10045773237943649,
"signal/advantage_std": 0.10045773237943649,
"signal/brier_reward/centered_abs_mean": 0.10293448865413665,
"signal/brier_reward/group_std_mean": 0.13437058925628662,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012866811081767082,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012866811081767082,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024662094563245772,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03065968081355095,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030827618204057215,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030827618204057215,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002529387711547315,
"signal/frontier_aurc_reward/group_std_mean": 0.004110026638954878,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5276039600139484e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5276039600139484e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13029766380786895,
"signal/frontier_coverage_1/group_std_mean": 0.17371802926063537,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002332328073680401,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002332328073680401,
"signal/frontier_coverage_10/centered_abs_mean": 0.11453571021556855,
"signal/frontier_coverage_10/group_std_mean": 0.15250465869903565,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020501891616731883,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020501891616731883,
"signal/frontier_coverage_15/centered_abs_mean": 0.07582000344991684,
"signal/frontier_coverage_15/group_std_mean": 0.10073214769363403,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001357178040780127,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001357178040780127,
"signal/frontier_coverage_20/centered_abs_mean": 0.0566535584628582,
"signal/frontier_coverage_20/group_std_mean": 0.0733156070113182,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010140986763872207,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010140986763872207,
"signal/frontier_coverage_25/centered_abs_mean": 0.06789239197969436,
"signal/frontier_coverage_25/group_std_mean": 0.0869957149028778,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012152737472206354,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012152737472206354,
"signal/frontier_coverage_5/centered_abs_mean": 0.13029766380786895,
"signal/frontier_coverage_5/group_std_mean": 0.17371802926063537,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002332328073680401,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002332328073680401,
"signal/frontier_ece_reward/centered_abs_mean": 0.003146560303866863,
"signal/frontier_ece_reward/group_std_mean": 0.004122556420043111,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003933200379833579,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003933200379833579,
"step": 275
},
{
"calibration/aurc": 0.25215917817714806,
"calibration/batch_distribution_entropy": 0.8739498979185845,
"calibration/buffer_distribution_entropy": 0.930060328102031,
"calibration/confidence_entropy": 0.39616207217016336,
"calibration/coverage@0%": 0.103125,
"calibration/coverage@1%": 0.13125,
"calibration/coverage@10%": 0.45390625,
"calibration/coverage@15%": 0.490625,
"calibration/coverage@20%": 0.5234375,
"calibration/coverage@25%": 0.546875,
"calibration/coverage@30%": 0.56640625,
"calibration/coverage@5%": 0.22109375,
"calibration/ece": 0.1740826497875611,
"calibration/mean_confidence": 0.5997064484977905,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 661.8,
"completions/max_terminated_length": 443.6,
"completions/mean_length": 200.58515625,
"completions/mean_terminated_length": 200.45480651855468,
"completions/min_length": 96.6,
"completions/min_terminated_length": 96.6,
"epoch": 0.896,
"grad_norm": 0.001766073633916676,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 948557902.0,
"reward": 1.0397424459457398,
"reward_std": 0.06280734091997146,
"rewards/accuracy_reward": 0.6123046875,
"rewards/brier_reward": 0.8335294604301453,
"rewards/confidence_uniqueness_reward": 0.94145667552948,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002396345266606659,
"rewards/frontier_coverage_1": 0.12290604412555695,
"rewards/frontier_coverage_10": 0.10812564045190812,
"rewards/frontier_coverage_15": 0.08217538744211197,
"rewards/frontier_coverage_20": 0.07664992213249207,
"rewards/frontier_coverage_25": 0.12757501602172852,
"rewards/frontier_coverage_5": 0.12290604412555695,
"rewards/frontier_ece_reward": 0.0031625948380678893,
"signal/accuracy_reward/centered_abs_mean": 0.07562255859375,
"signal/accuracy_reward/group_std_mean": 0.10868183225393295,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037811279296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037811279296875,
"signal/advantage_abs_mean": 0.04412608295679092,
"signal/advantage_pre_scale_abs_mean": 0.04412608295679092,
"signal/advantage_pre_scale_std": 0.09516832679510116,
"signal/advantage_std": 0.09516832679510116,
"signal/brier_reward/centered_abs_mean": 0.09220918267965317,
"signal/brier_reward/group_std_mean": 0.12125321626663207,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011526147834956646,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011526147834956646,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023380208760499954,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02966206856071949,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029225260950624943,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029225260950624943,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021038307808339597,
"signal/frontier_aurc_reward/group_std_mean": 0.0033327710116282105,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7658572182408534e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7658572182408534e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.11688004732131958,
"signal/frontier_coverage_1/group_std_mean": 0.15271745324134828,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00209215278737247,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00209215278737247,
"signal/frontier_coverage_10/centered_abs_mean": 0.10004872977733612,
"signal/frontier_coverage_10/group_std_mean": 0.13066715896129608,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001790872262790799,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001790872262790799,
"signal/frontier_coverage_15/centered_abs_mean": 0.06665360033512116,
"signal/frontier_coverage_15/group_std_mean": 0.08639876991510391,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011930993758141994,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011930993758141994,
"signal/frontier_coverage_20/centered_abs_mean": 0.05205147713422775,
"signal/frontier_coverage_20/group_std_mean": 0.06605355590581893,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009317214018665255,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009317214018665255,
"signal/frontier_coverage_25/centered_abs_mean": 0.06579188704490661,
"signal/frontier_coverage_25/group_std_mean": 0.08515497148036957,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011776747182011605,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011776747182011605,
"signal/frontier_coverage_5/centered_abs_mean": 0.11688004732131958,
"signal/frontier_coverage_5/group_std_mean": 0.15271745324134828,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00209215278737247,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00209215278737247,
"signal/frontier_ece_reward/centered_abs_mean": 0.0026048448868095874,
"signal/frontier_ece_reward/group_std_mean": 0.0034122115466743708,
"signal/frontier_ece_reward/group_zero_std_frac": 0.040625,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003256056108511984,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003256056108511984,
"step": 280
},
{
"calibration/aurc": 0.2771183806359747,
"calibration/batch_distribution_entropy": 0.8578019537959676,
"calibration/buffer_distribution_entropy": 0.9298157417811967,
"calibration/confidence_entropy": 0.38584274130978946,
"calibration/coverage@0%": 0.19453125,
"calibration/coverage@1%": 0.20078125,
"calibration/coverage@10%": 0.31953125,
"calibration/coverage@15%": 0.41953125,
"calibration/coverage@20%": 0.4734375,
"calibration/coverage@25%": 0.59375,
"calibration/coverage@30%": 0.62734375,
"calibration/coverage@5%": 0.25234375,
"calibration/ece": 0.1462862952874513,
"calibration/mean_confidence": 0.5480367561507691,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 504.0,
"completions/max_terminated_length": 504.0,
"completions/mean_length": 203.267578125,
"completions/mean_terminated_length": 203.267578125,
"completions/min_length": 97.6,
"completions/min_terminated_length": 97.6,
"epoch": 0.912,
"grad_norm": 0.0015597037272527814,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 965690658.0,
"reward": 1.02649986743927,
"reward_std": 0.06281092613935471,
"rewards/accuracy_reward": 0.58515625,
"rewards/brier_reward": 0.8292442321777344,
"rewards/confidence_uniqueness_reward": 0.946649169921875,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0018648097291588783,
"rewards/frontier_coverage_1": 0.13054397702217102,
"rewards/frontier_coverage_10": 0.11399659514427185,
"rewards/frontier_coverage_15": 0.08423100709915161,
"rewards/frontier_coverage_20": 0.07361575737595558,
"rewards/frontier_coverage_25": 0.1144769087433815,
"rewards/frontier_coverage_5": 0.13054397702217102,
"rewards/frontier_ece_reward": 0.0030390231404453516,
"signal/accuracy_reward/centered_abs_mean": 0.079638671875,
"signal/accuracy_reward/group_std_mean": 0.10629072934389114,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398193359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0398193359375,
"signal/advantage_abs_mean": 0.04734830111265183,
"signal/advantage_pre_scale_abs_mean": 0.04734830111265183,
"signal/advantage_pre_scale_std": 0.09430029839277268,
"signal/advantage_std": 0.09430029839277268,
"signal/brier_reward/centered_abs_mean": 0.09894435703754426,
"signal/brier_reward/group_std_mean": 0.12911319881677627,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012368044629693032,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012368044629693032,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.022022104263305663,
"signal/confidence_uniqueness_reward/group_std_mean": 0.027249596640467645,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002752763032913208,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002752763032913208,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0014280044939368962,
"signal/frontier_aurc_reward/group_std_mean": 0.0022502636536955835,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5561279471730813e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5561279471730813e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1327954038977623,
"signal/frontier_coverage_1/group_std_mean": 0.17255037724971772,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002377037703990936,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002377037703990936,
"signal/frontier_coverage_10/centered_abs_mean": 0.11172600984573364,
"signal/frontier_coverage_10/group_std_mean": 0.14537906944751738,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019998955307528377,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019998955307528377,
"signal/frontier_coverage_15/centered_abs_mean": 0.07505722343921661,
"signal/frontier_coverage_15/group_std_mean": 0.09774749577045441,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013435242231935262,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013435242231935262,
"signal/frontier_coverage_20/centered_abs_mean": 0.0550868459045887,
"signal/frontier_coverage_20/group_std_mean": 0.07034202218055725,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000986054469831288,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000986054469831288,
"signal/frontier_coverage_25/centered_abs_mean": 0.06590208411216736,
"signal/frontier_coverage_25/group_std_mean": 0.08453426957130432,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011796473059803247,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011796473059803247,
"signal/frontier_coverage_5/centered_abs_mean": 0.1327954038977623,
"signal/frontier_coverage_5/group_std_mean": 0.17255037724971772,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002377037703990936,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002377037703990936,
"signal/frontier_ece_reward/centered_abs_mean": 0.002762398170307279,
"signal/frontier_ece_reward/group_std_mean": 0.003595150355249643,
"signal/frontier_ece_reward/group_zero_std_frac": 0.021875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00034529977128840985,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00034529977128840985,
"step": 285
},
{
"calibration/aurc": 0.18523557038518218,
"calibration/batch_distribution_entropy": 0.9283445164377936,
"calibration/buffer_distribution_entropy": 0.9310390835813201,
"calibration/confidence_entropy": 0.44306056288304363,
"calibration/coverage@0%": 0.08046875,
"calibration/coverage@1%": 0.08046875,
"calibration/coverage@10%": 0.40859375,
"calibration/coverage@15%": 0.51640625,
"calibration/coverage@20%": 0.59140625,
"calibration/coverage@25%": 0.66015625,
"calibration/coverage@30%": 0.7515625,
"calibration/coverage@5%": 0.21484375,
"calibration/ece": 0.15614833606636924,
"calibration/mean_confidence": 0.5529057898582223,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 464.0,
"completions/max_terminated_length": 464.0,
"completions/mean_length": 201.409375,
"completions/mean_terminated_length": 201.409375,
"completions/min_length": 93.6,
"completions/min_terminated_length": 93.6,
"epoch": 0.928,
"grad_norm": 0.0014505106955766678,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 982779906.0,
"reward": 1.0353971242904663,
"reward_std": 0.06244761645793915,
"rewards/accuracy_reward": 0.6052734375,
"rewards/brier_reward": 0.8278081059455872,
"rewards/confidence_uniqueness_reward": 0.9446945190429688,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0016608674312010407,
"rewards/frontier_coverage_1": 0.11747038513422012,
"rewards/frontier_coverage_10": 0.10292258858680725,
"rewards/frontier_coverage_15": 0.07811000794172288,
"rewards/frontier_coverage_20": 0.07272942364215851,
"rewards/frontier_coverage_25": 0.12077962756156921,
"rewards/frontier_coverage_5": 0.11747038513422012,
"rewards/frontier_ece_reward": 0.0025405031628906727,
"signal/accuracy_reward/centered_abs_mean": 0.0777587890625,
"signal/accuracy_reward/group_std_mean": 0.10788596123456955,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125,
"signal/advantage_abs_mean": 0.0458635076880455,
"signal/advantage_pre_scale_abs_mean": 0.0458635076880455,
"signal/advantage_pre_scale_std": 0.09391386359930039,
"signal/advantage_std": 0.09391386359930039,
"signal/brier_reward/centered_abs_mean": 0.09663857668638229,
"signal/brier_reward/group_std_mean": 0.12685683369636536,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012079822085797786,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012079822085797786,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02300581932067871,
"signal/confidence_uniqueness_reward/group_std_mean": 0.028626967594027518,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028757274150848387,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028757274150848387,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015263804234564304,
"signal/frontier_aurc_reward/group_std_mean": 0.0025658855913206933,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7322209280100652e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7322209280100652e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13058804869651794,
"signal/frontier_coverage_1/group_std_mean": 0.17300075590610503,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002337525924667716,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002337525924667716,
"signal/frontier_coverage_10/centered_abs_mean": 0.10771108269691468,
"signal/frontier_coverage_10/group_std_mean": 0.14299528300762177,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019280282547697424,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019280282547697424,
"signal/frontier_coverage_15/centered_abs_mean": 0.0730916753411293,
"signal/frontier_coverage_15/group_std_mean": 0.09659909605979919,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013083409518003463,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013083409518003463,
"signal/frontier_coverage_20/centered_abs_mean": 0.05504903867840767,
"signal/frontier_coverage_20/group_std_mean": 0.07065875232219695,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009853777824901044,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009853777824901044,
"signal/frontier_coverage_25/centered_abs_mean": 0.06587158292531967,
"signal/frontier_coverage_25/group_std_mean": 0.08444809466600418,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001179101294837892,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001179101294837892,
"signal/frontier_coverage_5/centered_abs_mean": 0.13058804869651794,
"signal/frontier_coverage_5/group_std_mean": 0.17300075590610503,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002337525924667716,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002337525924667716,
"signal/frontier_ece_reward/centered_abs_mean": 0.002657411713153124,
"signal/frontier_ece_reward/group_std_mean": 0.003528282977640629,
"signal/frontier_ece_reward/group_zero_std_frac": 0.025,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003321764641441405,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003321764641441405,
"step": 290
},
{
"calibration/aurc": 0.227605377702519,
"calibration/batch_distribution_entropy": 0.8678024925217093,
"calibration/buffer_distribution_entropy": 0.9332008553883091,
"calibration/confidence_entropy": 0.39428912795582655,
"calibration/coverage@0%": 0.16796875,
"calibration/coverage@1%": 0.17578125,
"calibration/coverage@10%": 0.3359375,
"calibration/coverage@15%": 0.415625,
"calibration/coverage@20%": 0.48515625,
"calibration/coverage@25%": 0.56015625,
"calibration/coverage@30%": 0.61484375,
"calibration/coverage@5%": 0.2453125,
"calibration/ece": 0.09917310585392751,
"calibration/mean_confidence": 0.4722144451024394,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 920.0,
"completions/max_terminated_length": 489.2,
"completions/mean_length": 202.98466796875,
"completions/mean_terminated_length": 202.72425537109376,
"completions/min_length": 100.2,
"completions/min_terminated_length": 100.2,
"epoch": 0.944,
"grad_norm": 0.0016979072242975235,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 999833893.0,
"reward": 1.0405084133148192,
"reward_std": 0.07115750387310982,
"rewards/accuracy_reward": 0.616015625,
"rewards/brier_reward": 0.8274973273277283,
"rewards/confidence_uniqueness_reward": 0.9424091815948487,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.001480784686282277,
"rewards/frontier_coverage_1": 0.11875949800014496,
"rewards/frontier_coverage_10": 0.10412099286913871,
"rewards/frontier_coverage_15": 0.08062837272882462,
"rewards/frontier_coverage_20": 0.0747826412320137,
"rewards/frontier_coverage_25": 0.12094295620918274,
"rewards/frontier_coverage_5": 0.11873992830514908,
"rewards/frontier_ece_reward": 0.0025975925382226706,
"signal/accuracy_reward/centered_abs_mean": 0.10352783203125,
"signal/accuracy_reward/group_std_mean": 0.1342850521206856,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051763916015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051763916015625,
"signal/advantage_abs_mean": 0.054445850849151614,
"signal/advantage_pre_scale_abs_mean": 0.054445850849151614,
"signal/advantage_pre_scale_std": 0.10500096529722214,
"signal/advantage_std": 0.10500096529722214,
"signal/brier_reward/centered_abs_mean": 0.1040783628821373,
"signal/brier_reward/group_std_mean": 0.13488138020038604,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013009795360267163,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013009795360267163,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02404037192463875,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03030591309070587,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030050464905798436,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030050464905798436,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001270879921503365,
"signal/frontier_aurc_reward/group_std_mean": 0.0021014282014220954,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2748749870515894e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2748749870515894e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14869227409362792,
"signal/frontier_coverage_1/group_std_mean": 0.19285742044448853,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026615916285663843,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026615916285663843,
"signal/frontier_coverage_10/centered_abs_mean": 0.11862881183624267,
"signal/frontier_coverage_10/group_std_mean": 0.15488066375255585,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021234555868431928,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021234555868431928,
"signal/frontier_coverage_15/centered_abs_mean": 0.07927502691745758,
"signal/frontier_coverage_15/group_std_mean": 0.10365805774927139,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014190229121595621,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014190229121595621,
"signal/frontier_coverage_20/centered_abs_mean": 0.05630268827080727,
"signal/frontier_coverage_20/group_std_mean": 0.0722155287861824,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010078180697746576,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010078180697746576,
"signal/frontier_coverage_25/centered_abs_mean": 0.06522702798247337,
"signal/frontier_coverage_25/group_std_mean": 0.08425245583057403,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00116756372153759,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00116756372153759,
"signal/frontier_coverage_5/centered_abs_mean": 0.14861850142478944,
"signal/frontier_coverage_5/group_std_mean": 0.19274679124355315,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002660271106287837,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002660271106287837,
"signal/frontier_ece_reward/centered_abs_mean": 0.002932385681197047,
"signal/frontier_ece_reward/group_std_mean": 0.0038601367734372614,
"signal/frontier_ece_reward/group_zero_std_frac": 0.01875,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003665482101496309,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003665482101496309,
"step": 295
},
{
"calibration/aurc": 0.22753061737104918,
"calibration/batch_distribution_entropy": 0.8395263422525059,
"calibration/buffer_distribution_entropy": 0.9327987097688348,
"calibration/confidence_entropy": 0.3778946036635543,
"calibration/coverage@0%": 0.21328125,
"calibration/coverage@1%": 0.215625,
"calibration/coverage@10%": 0.4578125,
"calibration/coverage@15%": 0.50625,
"calibration/coverage@20%": 0.57734375,
"calibration/coverage@25%": 0.63671875,
"calibration/coverage@30%": 0.690625,
"calibration/coverage@5%": 0.4203125,
"calibration/ece": 0.21041953124999999,
"calibration/mean_confidence": 0.63975046875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 683.0,
"completions/max_terminated_length": 455.4,
"completions/mean_length": 202.90927734375,
"completions/mean_terminated_length": 202.77940368652344,
"completions/min_length": 99.6,
"completions/min_terminated_length": 99.6,
"epoch": 0.96,
"grad_norm": 0.0016687435563653708,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 1016852004.0,
"reward": 1.0291436433792114,
"reward_std": 0.0583199568092823,
"rewards/accuracy_reward": 0.58271484375,
"rewards/brier_reward": 0.8477208733558654,
"rewards/confidence_uniqueness_reward": 0.9432453274726867,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0021191579522565006,
"rewards/frontier_coverage_1": 0.1549065351486206,
"rewards/frontier_coverage_10": 0.12819213569164276,
"rewards/frontier_coverage_15": 0.09457356631755828,
"rewards/frontier_coverage_20": 0.08866416066884994,
"rewards/frontier_coverage_25": 0.14134843051433563,
"rewards/frontier_coverage_5": 0.15478427112102508,
"rewards/frontier_ece_reward": 0.0032231774181127547,
"signal/accuracy_reward/centered_abs_mean": 0.072796630859375,
"signal/accuracy_reward/group_std_mean": 0.10131096243858337,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363983154296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363983154296875,
"signal/advantage_abs_mean": 0.042303390055894854,
"signal/advantage_pre_scale_abs_mean": 0.042303390055894854,
"signal/advantage_pre_scale_std": 0.0915198415517807,
"signal/advantage_std": 0.0915198415517807,
"signal/brier_reward/centered_abs_mean": 0.08765042722225189,
"signal/brier_reward/group_std_mean": 0.11655679643154145,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956303402781486,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.010956303402781486,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.024246321246027946,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030507474020123482,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030307901557534932,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030307901557534932,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0017094084527343512,
"signal/frontier_aurc_reward/group_std_mean": 0.0029599607922136785,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.059841037611477e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.059841037611477e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12009998559951782,
"signal/frontier_coverage_1/group_std_mean": 0.15766243636608124,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149789733812213,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149789733812213,
"signal/frontier_coverage_10/centered_abs_mean": 0.09564173370599746,
"signal/frontier_coverage_10/group_std_mean": 0.12548429369926453,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017119870288297534,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017119870288297534,
"signal/frontier_coverage_15/centered_abs_mean": 0.06683021634817124,
"signal/frontier_coverage_15/group_std_mean": 0.08725652545690536,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011962608667090535,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011962608667090535,
"signal/frontier_coverage_20/centered_abs_mean": 0.052955988049507144,
"signal/frontier_coverage_20/group_std_mean": 0.06789801940321923,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009479121654294431,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009479121654294431,
"signal/frontier_coverage_25/centered_abs_mean": 0.06407563537359237,
"signal/frontier_coverage_25/group_std_mean": 0.08428025245666504,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001146953902207315,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001146953902207315,
"signal/frontier_coverage_5/centered_abs_mean": 0.1199414610862732,
"signal/frontier_coverage_5/group_std_mean": 0.15745915472507477,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002146952087059617,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002146952087059617,
"signal/frontier_ece_reward/centered_abs_mean": 0.0024932647589594125,
"signal/frontier_ece_reward/group_std_mean": 0.003281328594312072,
"signal/frontier_ece_reward/group_zero_std_frac": 0.028125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031165809486992656,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031165809486992656,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.40288964253104903,
"eval_calibration/batch_distribution_entropy": 0.9055634924361762,
"eval_calibration/buffer_distribution_entropy": 0.9315158471490221,
"eval_calibration/confidence_entropy": 0.4507296505092381,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.0625,
"eval_calibration/coverage@15%": 0.15625,
"eval_calibration/coverage@20%": 0.15625,
"eval_calibration/coverage@25%": 0.28125,
"eval_calibration/coverage@30%": 0.3125,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.2239203125,
"eval_calibration/mean_confidence": 0.5807953125,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 390.0,
"eval_completions/max_terminated_length": 390.0,
"eval_completions/mean_length": 201.66341400146484,
"eval_completions/mean_terminated_length": 201.66341400146484,
"eval_completions/min_length": 103.0,
"eval_completions/min_terminated_length": 103.0,
"eval_loss": 0.0,
"eval_num_tokens": 1016852004.0,
"eval_reward": 0.9454332888126373,
"eval_reward_std": 0.25646254420280457,
"eval_rewards/accuracy_reward": 0.44140625,
"eval_rewards/brier_reward": 0.7841920852661133,
"eval_rewards/confidence_uniqueness_reward": 0.8974609375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.005405109841376543,
"eval_rewards/frontier_coverage_1": 0.19863545149564743,
"eval_rewards/frontier_coverage_10": 0.15553244948387146,
"eval_rewards/frontier_coverage_15": 0.10400541499257088,
"eval_rewards/frontier_coverage_20": 0.06785453855991364,
"eval_rewards/frontier_coverage_25": 0.06973126530647278,
"eval_rewards/frontier_coverage_5": 0.19848963618278503,
"eval_rewards/frontier_ece_reward": 0.0032259345753118396,
"eval_runtime": 10.2094,
"eval_samples_per_second": 48.974,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4755859375,
"eval_signal/accuracy_reward/group_std_mean": 0.49512895941734314,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23779296875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23779296875,
"eval_signal/advantage_abs_mean": 0.24011892080307007,
"eval_signal/advantage_pre_scale_abs_mean": 0.24011892080307007,
"eval_signal/advantage_pre_scale_std": 0.2530638575553894,
"eval_signal/advantage_std": 0.2530638575553894,
"eval_signal/brier_reward/centered_abs_mean": 0.2406034916639328,
"eval_signal/brier_reward/group_std_mean": 0.29827988147735596,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0300754364579916,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0300754364579916,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0444183349609375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051535068079829216,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055522918701171875,
"eval_signal/confidence_uniqueness_reward/weight": 0.125,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055522918701171875,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007637398317456245,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.01684427261352539,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013670942280441523,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013670942280441523,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.31974154710769653,
"eval_signal/frontier_coverage_1/group_std_mean": 0.3967055380344391,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005723373498767614,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005723373498767614,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.2480403035879135,
"eval_signal/frontier_coverage_10/group_std_mean": 0.31127846240997314,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004439921351149678,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004439921351149678,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.15623818337917328,
"eval_signal/frontier_coverage_15/group_std_mean": 0.20206287503242493,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027966632042080164,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027966632042080164,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10376439616084099,
"eval_signal/frontier_coverage_20/group_std_mean": 0.13048581779003143,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018573826528154314,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018573826528154314,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.19033470749855042,
"eval_signal/frontier_coverage_25/group_std_mean": 0.24326416850090027,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034069910179823637,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034069910179823637,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.31926435232162476,
"eval_signal/frontier_coverage_5/group_std_mean": 0.3961242437362671,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057148318737745285,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057148318737745285,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.004807816818356514,
"eval_signal/frontier_ece_reward/group_std_mean": 0.00625448627397418,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006009771022945642,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006009771022945642,
"eval_steps_per_second": 0.196,
"step": 300
},
{
"epoch": 0.96,
"step": 300,
"train_probe_calibration/aurc": 0.11477367801974697,
"train_probe_calibration/batch_distribution_entropy": 0.8259865898626315,
"train_probe_calibration/buffer_distribution_entropy": 0.9317202950173966,
"train_probe_calibration/confidence_entropy": 0.39752484027856694,
"train_probe_calibration/coverage@0%": 0.140625,
"train_probe_calibration/coverage@1%": 0.140625,
"train_probe_calibration/coverage@10%": 0.734375,
"train_probe_calibration/coverage@15%": 0.8125,
"train_probe_calibration/coverage@20%": 0.875,
"train_probe_calibration/coverage@25%": 0.9375,
"train_probe_calibration/coverage@30%": 0.96875,
"train_probe_calibration/coverage@5%": 0.421875,
"train_probe_calibration/ece": 0.16384375,
"train_probe_calibration/mean_confidence": 0.65728125,
"train_probe_completions/clipped_ratio": 0.0,
"train_probe_completions/max_length": 349.0,
"train_probe_completions/max_terminated_length": 349.0,
"train_probe_completions/mean_length": 201.03668975830078,
"train_probe_completions/mean_terminated_length": 201.03668975830078,
"train_probe_completions/min_length": 111.5,
"train_probe_completions/min_terminated_length": 111.5,
"train_probe_loss": 0.0,
"train_probe_num_tokens": 1016852004.0,
"train_probe_reward": 1.0789863467216492,
"train_probe_reward_std": 0.22811973094940186,
"train_probe_rewards/accuracy_reward": 0.693359375,
"train_probe_rewards/brier_reward": 0.8687321543693542,
"train_probe_rewards/confidence_uniqueness_reward": 0.900390625,
"train_probe_rewards/format_reward": 1.0,
"train_probe_rewards/frontier_aurc_reward": -0.001137724844738841,
"train_probe_rewards/frontier_coverage_1": 0.09572022780776024,
"train_probe_rewards/frontier_coverage_10": 0.0828697718679905,
"train_probe_rewards/frontier_coverage_15": 0.07084467262029648,
"train_probe_rewards/frontier_coverage_20": 0.08797503262758255,
"train_probe_rewards/frontier_coverage_25": 0.17389176040887833,
"train_probe_rewards/frontier_coverage_5": 0.09541856124997139,
"train_probe_rewards/frontier_ece_reward": 0.0026104446733370423,
"train_probe_runtime": 9.6991,
"train_probe_samples_per_second": 51.551,
"train_probe_signal/accuracy_reward/centered_abs_mean": 0.4151611328125,
"train_probe_signal/accuracy_reward/group_std_mean": 0.46192415058612823,
"train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0,
"train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20758056640625,
"train_probe_signal/accuracy_reward/weight": 0.5,
"train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20758056640625,
"train_probe_signal/advantage_abs_mean": 0.20193417370319366,
"train_probe_signal/advantage_pre_scale_abs_mean": 0.20193417370319366,
"train_probe_signal/advantage_pre_scale_std": 0.2256685495376587,
"train_probe_signal/advantage_std": 0.2256685495376587,
"train_probe_signal/brier_reward/centered_abs_mean": 0.15471985936164856,
"train_probe_signal/brier_reward/group_std_mean": 0.21638543158769608,
"train_probe_signal/brier_reward/group_zero_std_frac": 0.0,
"train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01933998242020607,
"train_probe_signal/brier_reward/weight": 0.125,
"train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01933998242020607,
"train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039031982421875,
"train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.04604136198759079,
"train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004878997802734375,
"train_probe_signal/confidence_uniqueness_reward/weight": 0.125,
"train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004878997802734375,
"train_probe_signal/format_reward/centered_abs_mean": 0.0,
"train_probe_signal/format_reward/group_std_mean": 0.0,
"train_probe_signal/format_reward/group_zero_std_frac": 1.0,
"train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"train_probe_signal/format_reward/weight": 0.5,
"train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0,
"train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020724779460579157,
"train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003993918187916279,
"train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.709735210577492e-05,
"train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.709735210577492e-05,
"train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2661040276288986,
"train_probe_signal/frontier_coverage_1/group_std_mean": 0.37349459528923035,
"train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004763261880725622,
"train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004763261880725622,
"train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.20318175852298737,
"train_probe_signal/frontier_coverage_10/group_std_mean": 0.28969065845012665,
"train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036369531881064177,
"train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036369531881064177,
"train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12607631087303162,
"train_probe_signal/frontier_coverage_15/group_std_mean": 0.18381474167108536,
"train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022567659616470337,
"train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022567659616470337,
"train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.08980197459459305,
"train_probe_signal/frontier_coverage_20/group_std_mean": 0.11331581324338913,
"train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016074551967903972,
"train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016074551967903972,
"train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.17201132327318192,
"train_probe_signal/frontier_coverage_25/group_std_mean": 0.2077884078025818,
"train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030790024902671576,
"train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030790024902671576,
"train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.26485244929790497,
"train_probe_signal/frontier_coverage_5/group_std_mean": 0.3718564957380295,
"train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004740858683362603,
"train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856,
"train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004740858683362603,
"train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.004174819332547486,
"train_probe_signal/frontier_ece_reward/group_std_mean": 0.005938299465924501,
"train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005218524165684357,
"train_probe_signal/frontier_ece_reward/weight": 0.125,
"train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005218524165684357,
"train_probe_steps_per_second": 0.206
},
{
"calibration/aurc": 0.19888012009032868,
"calibration/batch_distribution_entropy": 0.8751961847929361,
"calibration/buffer_distribution_entropy": 0.9316692924834962,
"calibration/confidence_entropy": 0.4188681558773877,
"calibration/coverage@0%": 0.0359375,
"calibration/coverage@1%": 0.0359375,
"calibration/coverage@10%": 0.43515625,
"calibration/coverage@15%": 0.4859375,
"calibration/coverage@20%": 0.53671875,
"calibration/coverage@25%": 0.61484375,
"calibration/coverage@30%": 0.72421875,
"calibration/coverage@5%": 0.18515625,
"calibration/ece": 0.14540309140625002,
"calibration/mean_confidence": 0.64485612734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 931.6,
"completions/max_terminated_length": 540.2,
"completions/mean_length": 205.41650390625,
"completions/mean_terminated_length": 205.0261260986328,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.976,
"grad_norm": 0.0016158220823854208,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 1033816589.0,
"reward": 1.0453666210174561,
"reward_std": 0.06708120256662368,
"rewards/accuracy_reward": 0.62216796875,
"rewards/brier_reward": 0.8398854255676269,
"rewards/confidence_uniqueness_reward": 0.9433197736740112,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0018908762140199542,
"rewards/frontier_coverage_1": 0.11496728807687759,
"rewards/frontier_coverage_10": 0.09931781068444252,
"rewards/frontier_coverage_15": 0.07630908414721489,
"rewards/frontier_coverage_20": 0.07998319193720818,
"rewards/frontier_coverage_25": 0.1425451785326004,
"rewards/frontier_coverage_5": 0.11488909721374511,
"rewards/frontier_ece_reward": 0.0025672421557828783,
"signal/accuracy_reward/centered_abs_mean": 0.084222412109375,
"signal/accuracy_reward/group_std_mean": 0.11655709967017173,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421112060546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0421112060546875,
"signal/advantage_abs_mean": 0.048955275863409045,
"signal/advantage_pre_scale_abs_mean": 0.048955275863409045,
"signal/advantage_pre_scale_std": 0.10104106813669204,
"signal/advantage_std": 0.10104106813669204,
"signal/brier_reward/centered_abs_mean": 0.09183044731616974,
"signal/brier_reward/group_std_mean": 0.12157261669635773,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011478805914521217,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011478805914521217,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421579249203205,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03093937486410141,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030269740615040063,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030269740615040063,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015749115496873855,
"signal/frontier_aurc_reward/group_std_mean": 0.0027885420713573694,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.819091714627575e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.819091714627575e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12192182391881942,
"signal/frontier_coverage_1/group_std_mean": 0.15978844761848449,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002182400575838983,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002182400575838983,
"signal/frontier_coverage_10/centered_abs_mean": 0.09427153617143631,
"signal/frontier_coverage_10/group_std_mean": 0.12419438064098358,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016874604858458041,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016874604858458041,
"signal/frontier_coverage_15/centered_abs_mean": 0.06523038446903229,
"signal/frontier_coverage_15/group_std_mean": 0.08563594371080399,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011676238849759103,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011676238849759103,
"signal/frontier_coverage_20/centered_abs_mean": 0.05165816843509674,
"signal/frontier_coverage_20/group_std_mean": 0.06644249334931374,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009246811503544449,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009246811503544449,
"signal/frontier_coverage_25/centered_abs_mean": 0.06867350712418556,
"signal/frontier_coverage_25/group_std_mean": 0.09026172012090683,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012292557861655951,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012292557861655951,
"signal/frontier_coverage_5/centered_abs_mean": 0.1216941773891449,
"signal/frontier_coverage_5/group_std_mean": 0.15950067937374116,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021783256670460105,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021783256670460105,
"signal/frontier_ece_reward/centered_abs_mean": 0.00236211777664721,
"signal/frontier_ece_reward/group_std_mean": 0.003097822656854987,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029526472208090125,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029526472208090125,
"step": 305
},
{
"calibration/aurc": 0.27617015723730154,
"calibration/batch_distribution_entropy": 0.8719944630571315,
"calibration/buffer_distribution_entropy": 0.9307879294053564,
"calibration/confidence_entropy": 0.3881166828523776,
"calibration/coverage@0%": 0.15625,
"calibration/coverage@1%": 0.18515625,
"calibration/coverage@10%": 0.31796875,
"calibration/coverage@15%": 0.36171875,
"calibration/coverage@20%": 0.4203125,
"calibration/coverage@25%": 0.48046875,
"calibration/coverage@30%": 0.53515625,
"calibration/coverage@5%": 0.27578125,
"calibration/ece": 0.1480644396551724,
"calibration/mean_confidence": 0.4999269396551724,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 497.4,
"completions/max_terminated_length": 497.4,
"completions/mean_length": 200.55478515625,
"completions/mean_terminated_length": 200.55478515625,
"completions/min_length": 91.2,
"completions/min_terminated_length": 91.2,
"epoch": 0.992,
"grad_norm": 0.0018319895025342703,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 1050998750.0,
"reward": 1.0178974866867065,
"reward_std": 0.062009623646736144,
"rewards/accuracy_reward": 0.571875,
"rewards/brier_reward": 0.818060839176178,
"rewards/confidence_uniqueness_reward": 0.9377853393554687,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0024078495102003218,
"rewards/frontier_coverage_1": 0.13882496058940888,
"rewards/frontier_coverage_10": 0.11529144048690795,
"rewards/frontier_coverage_15": 0.08634113371372223,
"rewards/frontier_coverage_20": 0.08047932088375091,
"rewards/frontier_coverage_25": 0.1226568266749382,
"rewards/frontier_coverage_5": 0.13870886862277984,
"rewards/frontier_ece_reward": 0.0024728897726163266,
"signal/accuracy_reward/centered_abs_mean": 0.08270263671875,
"signal/accuracy_reward/group_std_mean": 0.10843254029750823,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041351318359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.041351318359375,
"signal/advantage_abs_mean": 0.04691413417458534,
"signal/advantage_pre_scale_abs_mean": 0.04691413417458534,
"signal/advantage_pre_scale_std": 0.09592601060867309,
"signal/advantage_std": 0.09592601060867309,
"signal/brier_reward/centered_abs_mean": 0.0934365376830101,
"signal/brier_reward/group_std_mean": 0.12053980976343155,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011679567210376263,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011679567210376263,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.025814294815063477,
"signal/confidence_uniqueness_reward/group_std_mean": 0.03233279511332512,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032267868518829346,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032267868518829346,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001967202941887081,
"signal/frontier_aurc_reward/group_std_mean": 0.0030709158163517714,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521293183439411e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521293183439411e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12599806636571884,
"signal/frontier_coverage_1/group_std_mean": 0.16160787940025328,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022553652757778763,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022553652757778763,
"signal/frontier_coverage_10/centered_abs_mean": 0.09844744727015495,
"signal/frontier_coverage_10/group_std_mean": 0.12621570527553558,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001762209297157824,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001762209297157824,
"signal/frontier_coverage_15/centered_abs_mean": 0.06843779757618904,
"signal/frontier_coverage_15/group_std_mean": 0.0877251997590065,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012250364990904928,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012250364990904928,
"signal/frontier_coverage_20/centered_abs_mean": 0.05275077372789383,
"signal/frontier_coverage_20/group_std_mean": 0.06691490858793259,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009442388545721769,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009442388545721769,
"signal/frontier_coverage_25/centered_abs_mean": 0.06476361751556396,
"signal/frontier_coverage_25/group_std_mean": 0.08386294692754745,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011592687340453267,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011592687340453267,
"signal/frontier_coverage_5/centered_abs_mean": 0.12582006603479384,
"signal/frontier_coverage_5/group_std_mean": 0.16137229949235915,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022521790815517306,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022521790815517306,
"signal/frontier_ece_reward/centered_abs_mean": 0.0023749925196170805,
"signal/frontier_ece_reward/group_std_mean": 0.003116936841979623,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029687406495213506,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029687406495213506,
"step": 310
},
{
"calibration/aurc": 0.07394829778523786,
"calibration/batch_distribution_entropy": 0.6858898086044589,
"calibration/buffer_distribution_entropy": 0.9311619746607875,
"calibration/confidence_entropy": 0.34007166230520547,
"calibration/coverage@0%": 0.10546875,
"calibration/coverage@1%": 0.10546875,
"calibration/coverage@10%": 0.802734375,
"calibration/coverage@15%": 0.943359375,
"calibration/coverage@20%": 0.970703125,
"calibration/coverage@25%": 1.0,
"calibration/coverage@30%": 1.0,
"calibration/coverage@5%": 0.5,
"calibration/ece": 0.136876953125,
"calibration/mean_confidence": 0.7878769531250001,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 386.5,
"completions/max_terminated_length": 386.5,
"completions/mean_length": 199.02162170410156,
"completions/mean_terminated_length": 199.02162170410156,
"completions/min_length": 96.0,
"completions/min_terminated_length": 96.0,
"epoch": 0.9984,
"num_tokens": 1057815101.0,
"reward": 1.050271451473236,
"reward_std": 0.06441943719983101,
"rewards/accuracy_reward": 0.645263671875,
"rewards/brier_reward": 0.8100776672363281,
"rewards/confidence_uniqueness_reward": 0.9436569213867188,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.001602545497007668,
"rewards/frontier_coverage_1": 0.07218670099973679,
"rewards/frontier_coverage_10": 0.0615706741809845,
"rewards/frontier_coverage_15": 0.05613754317164421,
"rewards/frontier_coverage_20": 0.06846107542514801,
"rewards/frontier_coverage_25": 0.12920933216810226,
"rewards/frontier_coverage_5": 0.07211882993578911,
"rewards/frontier_ece_reward": 0.0017852028249762952,
"signal/accuracy_reward/centered_abs_mean": 0.0796661376953125,
"signal/accuracy_reward/group_std_mean": 0.11285967007279396,
"signal/accuracy_reward/group_zero_std_frac": 0.6484375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03983306884765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03983306884765625,
"signal/advantage_abs_mean": 0.047435952350497246,
"signal/advantage_pre_scale_abs_mean": 0.047435952350497246,
"signal/advantage_pre_scale_std": 0.09802256524562836,
"signal/advantage_std": 0.09802256524562836,
"signal/brier_reward/centered_abs_mean": 0.10139483213424683,
"signal/brier_reward/group_std_mean": 0.12895793095231056,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012674354016780853,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012674354016780853,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.023676156997680664,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029014757834374905,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002959519624710083,
"signal/confidence_uniqueness_reward/weight": 0.125,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002959519624710083,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0015349971363320947,
"signal/frontier_aurc_reward/group_std_mean": 0.002599976258352399,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.747644975897856e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.747644975897856e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.12439806759357452,
"signal/frontier_coverage_1/group_std_mean": 0.16710513830184937,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002226725220680237,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002226725220680237,
"signal/frontier_coverage_10/centered_abs_mean": 0.09704583883285522,
"signal/frontier_coverage_10/group_std_mean": 0.13068146258592606,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017371204448863864,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017371204448863864,
"signal/frontier_coverage_15/centered_abs_mean": 0.06808548793196678,
"signal/frontier_coverage_15/group_std_mean": 0.0913914144039154,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001218730176333338,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001218730176333338,
"signal/frontier_coverage_20/centered_abs_mean": 0.05189245194196701,
"signal/frontier_coverage_20/group_std_mean": 0.06728483736515045,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009288748260587454,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009288748260587454,
"signal/frontier_coverage_25/centered_abs_mean": 0.06688933074474335,
"signal/frontier_coverage_25/group_std_mean": 0.08471940457820892,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011973190703429282,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011973190703429282,
"signal/frontier_coverage_5/centered_abs_mean": 0.12402944266796112,
"signal/frontier_coverage_5/group_std_mean": 0.16661176830530167,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002220126916654408,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002220126916654408,
"signal/frontier_ece_reward/centered_abs_mean": 0.002541982219554484,
"signal/frontier_ece_reward/group_std_mean": 0.0034003107575699687,
"signal/frontier_ece_reward/group_zero_std_frac": 0.03125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003177477774443105,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003177477774443105,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.00456765069126656,
"train_runtime": 31094.48,
"train_samples_per_second": 0.643,
"train_steps_per_second": 0.01
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1057815101,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}