{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.574161369417126, "calibration/batch_distribution_entropy": 0.6217632380850391, "calibration/confidence_entropy": 0.3449140549111297, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.07282051282051281, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47335296160366747, "calibration/mean_confidence": 0.8051669529651726, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0361328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1501.2, "completions/mean_length": 268.3080078125, "completions/mean_terminated_length": 220.78201293945312, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.11463230848312378, "learning_rate": 3.1249999999999997e-07, "loss": 0.0912, "num_tokens": 17591506.0, "reward": 0.6744200468063355, "reward_std": 0.49649240970611574, "rewards/accuracy_reward": 0.26630859375, "rewards/brier_reward": 0.4115479052066803, "rewards/confidence_uniqueness_reward": 0.4812490105628967, "rewards/format_reward": 0.68798828125, "rewards/frontier_aurc_reward": 0.3422773241996765, "rewards/frontier_coverage_1": 0.3422773241996765, "rewards/frontier_coverage_10": 0.3422773241996765, "rewards/frontier_coverage_15": 0.3422773241996765, "rewards/frontier_coverage_20": 0.3422773241996765, "rewards/frontier_coverage_25": 0.3422773241996765, "rewards/frontier_coverage_5": 0.3422773241996765, "rewards/frontier_ece_reward": 0.3422773241996765, "signal/accuracy_reward/centered_abs_mean": 0.274066162109375, "signal/accuracy_reward/group_std_mean": 0.31360672116279603, "signal/accuracy_reward/group_zero_std_frac": 0.26875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1370330810546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1370330810546875, "signal/advantage_abs_mean": 0.42605471014976504, "signal/advantage_pre_scale_abs_mean": 0.42605471014976504, "signal/advantage_pre_scale_std": 0.5046224594116211, "signal/advantage_std": 0.5046224594116211, "signal/brier_reward/centered_abs_mean": 0.33465067148208616, "signal/brier_reward/group_std_mean": 0.3789239704608917, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04183133393526077, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04183133393526077, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2928457796573639, "signal/confidence_uniqueness_reward/group_std_mean": 0.34470821022987364, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03660572245717049, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03660572245717049, "signal/format_reward/centered_abs_mean": 0.394317626953125, "signal/format_reward/group_std_mean": 0.4479940414428711, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1971588134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1971588134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.31442518830299376, "signal/frontier_aurc_reward/group_std_mean": 0.3627101004123688, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_1/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_1/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_10/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_10/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_15/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_15/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_20/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_20/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_25/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_25/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_5/centered_abs_mean": 0.31442518830299376, "signal/frontier_coverage_5/group_std_mean": 0.3627101004123688, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005628210585564375, "signal/frontier_ece_reward/centered_abs_mean": 0.31442518830299376, "signal/frontier_ece_reward/group_std_mean": 0.3627101004123688, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03930314853787422, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03930314853787422, "step": 5 }, { "calibration/aurc": 0.6252603742483307, "calibration/batch_distribution_entropy": 0.6553918903963509, "calibration/confidence_entropy": 0.3561837908236652, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4639029824341743, "calibration/mean_confidence": 0.7835663529333339, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03310546875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1525.8, "completions/mean_length": 253.1953125, "completions/mean_terminated_length": 209.34018249511718, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.038865186274051666, "learning_rate": 6.249999999999999e-07, "loss": 0.0913, "num_tokens": 35284578.0, "reward": 0.7065129756927491, "reward_std": 0.4615809082984924, "rewards/accuracy_reward": 0.25361328125, "rewards/brier_reward": 0.4207825243473053, "rewards/confidence_uniqueness_reward": 0.5312160611152649, "rewards/format_reward": 0.75029296875, "rewards/frontier_aurc_reward": 0.3418300449848175, "rewards/frontier_coverage_1": 0.3418300449848175, "rewards/frontier_coverage_10": 0.3418300449848175, "rewards/frontier_coverage_15": 0.3418300449848175, "rewards/frontier_coverage_20": 0.3418300449848175, "rewards/frontier_coverage_25": 0.3418300449848175, "rewards/frontier_coverage_5": 0.3418300449848175, "rewards/frontier_ece_reward": 0.3418300449848175, "signal/accuracy_reward/centered_abs_mean": 0.252484130859375, "signal/accuracy_reward/group_std_mean": 0.3011133372783661, "signal/accuracy_reward/group_zero_std_frac": 0.259375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1262420654296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1262420654296875, "signal/advantage_abs_mean": 0.37834325432777405, "signal/advantage_pre_scale_abs_mean": 0.37834325432777405, "signal/advantage_pre_scale_std": 0.470405113697052, "signal/advantage_std": 0.470405113697052, "signal/brier_reward/centered_abs_mean": 0.3180624425411224, "signal/brier_reward/group_std_mean": 0.36653814315795896, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0397578053176403, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0397578053176403, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.26791125535964966, "signal/confidence_uniqueness_reward/group_std_mean": 0.3284755825996399, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03348890691995621, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03348890691995621, "signal/format_reward/centered_abs_mean": 0.348736572265625, "signal/format_reward/group_std_mean": 0.4197552680969238, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1743682861328125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1743682861328125, "signal/frontier_aurc_reward/centered_abs_mean": 0.2994271457195282, "signal/frontier_aurc_reward/group_std_mean": 0.35230074524879457, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_1/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_1/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_10/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_10/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_15/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_15/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_20/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_20/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_25/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_25/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_5/centered_abs_mean": 0.2994271457195282, "signal/frontier_coverage_5/group_std_mean": 0.35230074524879457, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00535974558442831, "signal/frontier_ece_reward/centered_abs_mean": 0.2994271457195282, "signal/frontier_ece_reward/group_std_mean": 0.35230074524879457, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03742839321494103, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03742839321494103, "step": 10 }, { "calibration/aurc": 0.5063660563094874, "calibration/batch_distribution_entropy": 0.6456137890333957, "calibration/confidence_entropy": 0.3523480270306522, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.40836266399469745, "calibration/mean_confidence": 0.803510317425796, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0111328125, "completions/max_length": 1536.0, "completions/max_terminated_length": 1402.4, "completions/mean_length": 187.18408203125, "completions/mean_terminated_length": 172.0759704589844, "completions/min_length": 8.8, "completions/min_terminated_length": 8.8, "epoch": 0.048, "grad_norm": 0.3801707625389099, "learning_rate": 9.374999999999999e-07, "loss": 0.0454, "num_tokens": 52250079.0, "reward": 0.874800705909729, "reward_std": 0.349214905500412, "rewards/accuracy_reward": 0.3330078125, "rewards/brier_reward": 0.5289363861083984, "rewards/confidence_uniqueness_reward": 0.6511791348457336, "rewards/format_reward": 0.90400390625, "rewards/frontier_aurc_reward": 0.434600293636322, "rewards/frontier_coverage_1": 0.434600293636322, "rewards/frontier_coverage_10": 0.434600293636322, "rewards/frontier_coverage_15": 0.434600293636322, "rewards/frontier_coverage_20": 0.434600293636322, "rewards/frontier_coverage_25": 0.434600293636322, "rewards/frontier_coverage_5": 0.434600293636322, "rewards/frontier_ece_reward": 0.434600293636322, "signal/accuracy_reward/centered_abs_mean": 0.21435546875, "signal/accuracy_reward/group_std_mean": 0.2680795192718506, "signal/accuracy_reward/group_zero_std_frac": 0.3, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.107177734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.107177734375, "signal/advantage_abs_mean": 0.2617394238710403, "signal/advantage_pre_scale_abs_mean": 0.2617394238710403, "signal/advantage_pre_scale_std": 0.3631041467189789, "signal/advantage_std": 0.3631041467189789, "signal/brier_reward/centered_abs_mean": 0.26632643938064576, "signal/brier_reward/group_std_mean": 0.32310367822647096, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03329080492258072, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03329080492258072, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.18657545149326324, "signal/confidence_uniqueness_reward/group_std_mean": 0.24681947529315948, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023321931436657905, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023321931436657905, "signal/format_reward/centered_abs_mean": 0.161785888671875, "signal/format_reward/group_std_mean": 0.25919924676418304, "signal/format_reward/group_zero_std_frac": 0.125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0808929443359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0808929443359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.26505063772201537, "signal/frontier_aurc_reward/group_std_mean": 0.3231291711330414, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_1/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_1/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_10/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_10/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_15/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_15/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_20/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_20/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_25/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_25/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_5/centered_abs_mean": 0.26505063772201537, "signal/frontier_coverage_5/group_std_mean": 0.3231291711330414, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0047444062307477, "signal/frontier_ece_reward/centered_abs_mean": 0.26505063772201537, "signal/frontier_ece_reward/group_std_mean": 0.3231291711330414, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03313132971525192, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03313132971525192, "step": 15 }, { "calibration/aurc": 0.4569322433246872, "calibration/batch_distribution_entropy": 0.701901124719379, "calibration/confidence_entropy": 0.368413807477986, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.14661354581673308, "calibration/coverage@30%": 0.26693227091633465, "calibration/coverage@5%": 0.0, "calibration/ece": 0.31210808530583545, "calibration/mean_confidence": 0.7689342707600968, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00302734375, "completions/max_length": 1536.0, "completions/max_terminated_length": 980.0, "completions/mean_length": 133.61025390625, "completions/mean_terminated_length": 129.3588897705078, "completions/min_length": 26.4, "completions/min_terminated_length": 26.4, "epoch": 0.064, "grad_norm": 0.03475378826260567, "learning_rate": 1e-06, "loss": 0.0134, "num_tokens": 68536648.0, "reward": 0.9799639225006104, "reward_std": 0.26246568858623504, "rewards/accuracy_reward": 0.39072265625, "rewards/brier_reward": 0.6048341035842896, "rewards/confidence_uniqueness_reward": 0.738007652759552, "rewards/format_reward": 0.98427734375, "rewards/frontier_aurc_reward": 0.4978376030921936, "rewards/frontier_coverage_1": 0.4978376030921936, "rewards/frontier_coverage_10": 0.4978376030921936, "rewards/frontier_coverage_15": 0.4978376030921936, "rewards/frontier_coverage_20": 0.4978376030921936, "rewards/frontier_coverage_25": 0.4978376030921936, "rewards/frontier_coverage_5": 0.4978376030921936, "rewards/frontier_ece_reward": 0.4978376030921936, "signal/accuracy_reward/centered_abs_mean": 0.214776611328125, "signal/accuracy_reward/group_std_mean": 0.2681283473968506, "signal/accuracy_reward/group_zero_std_frac": 0.296875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1073883056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1073883056640625, "signal/advantage_abs_mean": 0.20265749394893645, "signal/advantage_pre_scale_abs_mean": 0.20265749394893645, "signal/advantage_pre_scale_std": 0.2796101540327072, "signal/advantage_std": 0.2796101540327072, "signal/brier_reward/centered_abs_mean": 0.23721030354499817, "signal/brier_reward/group_std_mean": 0.2935959815979004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02965128794312477, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02965128794312477, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12453396171331406, "signal/confidence_uniqueness_reward/group_std_mean": 0.15769868493080139, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015566745214164257, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015566745214164257, "signal/format_reward/centered_abs_mean": 0.029925537109375, "signal/format_reward/group_std_mean": 0.07638685405254364, "signal/format_reward/group_zero_std_frac": 0.609375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0149627685546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0149627685546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.2561593323945999, "signal/frontier_aurc_reward/group_std_mean": 0.3122838795185089, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_1/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_1/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_10/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_10/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_15/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_15/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_20/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_20/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_25/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_25/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_5/centered_abs_mean": 0.2561593323945999, "signal/frontier_coverage_5/group_std_mean": 0.3122838795185089, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045852516777813435, "signal/frontier_ece_reward/centered_abs_mean": 0.2561593323945999, "signal/frontier_ece_reward/group_std_mean": 0.3122838795185089, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03201991654932499, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03201991654932499, "step": 20 }, { "calibration/aurc": 0.5084255341898481, "calibration/batch_distribution_entropy": 0.8164247601318154, "calibration/buffer_distribution_entropy": 0.7090379807345808, "calibration/confidence_entropy": 0.45600894852795226, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.029133858267716535, "calibration/coverage@30%": 0.07401574803149606, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3110133148570779, "calibration/mean_confidence": 0.6897827998864032, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1409.6, "completions/max_terminated_length": 799.0, "completions/mean_length": 118.04287109375, "completions/mean_terminated_length": 116.93400421142579, "completions/min_length": 38.8, "completions/min_terminated_length": 38.8, "epoch": 0.08, "grad_norm": 0.00825721025466919, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 84678559.0, "reward": 0.9916581392288208, "reward_std": 0.19244107306003572, "rewards/accuracy_reward": 0.4255859375, "rewards/brier_reward": 0.6843536019325256, "rewards/confidence_uniqueness_reward": 0.817476212978363, "rewards/format_reward": 0.996484375, "rewards/frontier_aurc_reward": 0.3578193149529397, "rewards/frontier_coverage_1": 0.3889893189072609, "rewards/frontier_coverage_10": 0.3889893189072609, "rewards/frontier_coverage_15": 0.3889893189072609, "rewards/frontier_coverage_20": 0.3889893189072609, "rewards/frontier_coverage_25": 0.3889893189072609, "rewards/frontier_coverage_5": 0.3889893189072609, "rewards/frontier_ece_reward": 0.3576948957517743, "signal/accuracy_reward/centered_abs_mean": 0.18526611328125, "signal/accuracy_reward/group_std_mean": 0.23501957356929778, "signal/accuracy_reward/group_zero_std_frac": 0.36875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.092633056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.092633056640625, "signal/advantage_abs_mean": 0.1510450452566147, "signal/advantage_pre_scale_abs_mean": 0.1510450452566147, "signal/advantage_pre_scale_std": 0.21218505203723909, "signal/advantage_std": 0.21218505203723909, "signal/brier_reward/centered_abs_mean": 0.2015215128660202, "signal/brier_reward/group_std_mean": 0.25240307450294497, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025190189108252527, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.025190189108252527, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08064173310995101, "signal/confidence_uniqueness_reward/group_std_mean": 0.10429088771343231, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010080216638743877, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010080216638743877, "signal/format_reward/centered_abs_mean": 0.00677490234375, "signal/format_reward/group_std_mean": 0.01887845266610384, "signal/format_reward/group_zero_std_frac": 0.896875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003387451171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003387451171875, "signal/frontier_aurc_reward/centered_abs_mean": 0.1544154985807836, "signal/frontier_aurc_reward/group_std_mean": 0.18873186707496642, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.002764037343149539, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.002764037343149539, "signal/frontier_coverage_1/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_1/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_10/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_10/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_15/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_15/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_20/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_20/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_25/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_25/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_5/centered_abs_mean": 0.21573287844657899, "signal/frontier_coverage_5/group_std_mean": 0.27374354004859924, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038616183679550885, "signal/frontier_ece_reward/centered_abs_mean": 0.1824594885110855, "signal/frontier_ece_reward/group_std_mean": 0.22269095629453659, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02280743606388569, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02280743606388569, "step": 25 }, { "calibration/aurc": 0.6451447340341319, "calibration/batch_distribution_entropy": 0.8836254438118136, "calibration/buffer_distribution_entropy": 0.7536336258740739, "calibration/confidence_entropy": 0.5477458249391652, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.00234375, "calibration/coverage@15%": 0.00234375, "calibration/coverage@20%": 0.00234375, "calibration/coverage@25%": 0.003125, "calibration/coverage@30%": 0.003125, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.2766606577437428, "calibration/mean_confidence": 0.5350011617572232, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 864.6, "completions/max_terminated_length": 464.4, "completions/mean_length": 126.6078125, "completions/mean_terminated_length": 126.33264770507813, "completions/min_length": 41.4, "completions/min_terminated_length": 41.4, "epoch": 0.096, "grad_norm": 0.007818322628736496, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 101019631.0, "reward": 0.9390641927719117, "reward_std": 0.1480298787355423, "rewards/accuracy_reward": 0.4728515625, "rewards/brier_reward": 0.7311343312263489, "rewards/confidence_uniqueness_reward": 0.8339264154434204, "rewards/format_reward": 0.99755859375, "rewards/frontier_aurc_reward": -0.004495029617100954, "rewards/frontier_coverage_1": 0.06669748276472091, "rewards/frontier_coverage_10": 0.06669748276472091, "rewards/frontier_coverage_15": 0.06669748276472091, "rewards/frontier_coverage_20": 0.06669748276472091, "rewards/frontier_coverage_25": 0.06669748276472091, "rewards/frontier_coverage_5": 0.06669748276472091, "rewards/frontier_ece_reward": 0.009149301517754792, "signal/accuracy_reward/centered_abs_mean": 0.17642822265625, "signal/accuracy_reward/group_std_mean": 0.22929745614528657, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088214111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.088214111328125, "signal/advantage_abs_mean": 0.11449322551488876, "signal/advantage_pre_scale_abs_mean": 0.11449322551488876, "signal/advantage_pre_scale_std": 0.16401045322418212, "signal/advantage_std": 0.16401045322418212, "signal/brier_reward/centered_abs_mean": 0.18242722153663635, "signal/brier_reward/group_std_mean": 0.23005988895893098, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022803402692079543, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022803402692079543, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08953404575586318, "signal/confidence_uniqueness_reward/group_std_mean": 0.1142925649881363, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011191755719482898, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011191755719482898, "signal/format_reward/centered_abs_mean": 0.004730224609375, "signal/format_reward/group_std_mean": 0.013810678757727146, "signal/format_reward/group_zero_std_frac": 0.921875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0023651123046875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0023651123046875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024468526942655446, "signal/frontier_aurc_reward/group_std_mean": 0.003993393434211612, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.379866222734563e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.379866222734563e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_1/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_10/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_10/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_15/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_15/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_20/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_20/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_25/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_25/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_5/centered_abs_mean": 0.1959122210741043, "signal/frontier_coverage_5/group_std_mean": 0.26175145506858827, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003506828611716628, "signal/frontier_ece_reward/centered_abs_mean": 0.06938310116529464, "signal/frontier_ece_reward/group_std_mean": 0.08558708280324936, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00867288764566183, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00867288764566183, "step": 30 }, { "calibration/aurc": 0.3086294818860637, "calibration/batch_distribution_entropy": 0.8961884358507668, "calibration/buffer_distribution_entropy": 0.8133524081442107, "calibration/confidence_entropy": 0.5430245461216926, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.06875, "calibration/coverage@15%": 0.10546875, "calibration/coverage@20%": 0.19140625, "calibration/coverage@25%": 0.37265625, "calibration/coverage@30%": 0.53671875, "calibration/coverage@5%": 0.025, "calibration/ece": 0.15086028370235913, "calibration/mean_confidence": 0.46044110662133236, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1082.4, "completions/max_terminated_length": 397.4, "completions/mean_length": 135.834375, "completions/mean_terminated_length": 135.4240753173828, "completions/min_length": 49.2, "completions/min_terminated_length": 49.2, "epoch": 0.112, "grad_norm": 0.004440602846443653, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 117520047.0, "reward": 0.9525650858879089, "reward_std": 0.12223374545574188, "rewards/accuracy_reward": 0.48525390625, "rewards/brier_reward": 0.7479133129119873, "rewards/confidence_uniqueness_reward": 0.8512405276298523, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003925839858129621, "rewards/frontier_coverage_1": 0.08381552398204803, "rewards/frontier_coverage_10": 0.08381552398204803, "rewards/frontier_coverage_15": 0.08381552398204803, "rewards/frontier_coverage_20": 0.08381552398204803, "rewards/frontier_coverage_25": 0.08381552398204803, "rewards/frontier_coverage_5": 0.08381552398204803, "rewards/frontier_ece_reward": 0.012414590083062648, "signal/accuracy_reward/centered_abs_mean": 0.167669677734375, "signal/accuracy_reward/group_std_mean": 0.21841561794281006, "signal/accuracy_reward/group_zero_std_frac": 0.390625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0838348388671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0838348388671875, "signal/advantage_abs_mean": 0.09548963457345963, "signal/advantage_pre_scale_abs_mean": 0.09548963457345963, "signal/advantage_pre_scale_std": 0.13613282144069672, "signal/advantage_std": 0.13613282144069672, "signal/brier_reward/centered_abs_mean": 0.1708603948354721, "signal/brier_reward/group_std_mean": 0.21544656455516814, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021357549354434013, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021357549354434013, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08106829673051834, "signal/confidence_uniqueness_reward/group_std_mean": 0.09855363517999649, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010133537091314792, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010133537091314792, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016749128932133316, "signal/frontier_aurc_reward/group_std_mean": 0.002730554435402155, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.998093877977226e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.998093877977226e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_1/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_10/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_10/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_15/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_15/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_20/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_20/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_25/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_25/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_5/centered_abs_mean": 0.24808040857315064, "signal/frontier_coverage_5/group_std_mean": 0.3155758440494537, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0044406389351934195, "signal/frontier_ece_reward/centered_abs_mean": 0.04592524915933609, "signal/frontier_ece_reward/group_std_mean": 0.06059465631842613, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005740656144917011, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005740656144917011, "step": 35 }, { "calibration/aurc": 0.4580176944367551, "calibration/batch_distribution_entropy": 0.8535035360352362, "calibration/buffer_distribution_entropy": 0.8659183215624904, "calibration/confidence_entropy": 0.519083178935934, "calibration/coverage@0%": 0.007815563725490197, "calibration/coverage@1%": 0.007815563725490197, "calibration/coverage@10%": 0.03203431372549019, "calibration/coverage@15%": 0.06328431372549019, "calibration/coverage@20%": 0.11484681372549019, "calibration/coverage@25%": 0.1296905637254902, "calibration/coverage@30%": 0.2289093137254902, "calibration/coverage@5%": 0.007815563725490197, "calibration/ece": 0.1627333490117647, "calibration/mean_confidence": 0.3436108605470588, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 912.4, "completions/max_terminated_length": 448.8, "completions/mean_length": 149.380859375, "completions/mean_terminated_length": 149.11015625, "completions/min_length": 65.4, "completions/min_terminated_length": 65.4, "epoch": 0.128, "grad_norm": 0.003327795770019293, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 133966379.0, "reward": 0.9490593194961547, "reward_std": 0.1036192610859871, "rewards/accuracy_reward": 0.47001953125, "rewards/brier_reward": 0.7549473881721497, "rewards/confidence_uniqueness_reward": 0.8515770912170411, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0035853940062224865, "rewards/frontier_coverage_1": 0.11151133924722671, "rewards/frontier_coverage_10": 0.11151133924722671, "rewards/frontier_coverage_15": 0.11151133924722671, "rewards/frontier_coverage_20": 0.11151133924722671, "rewards/frontier_coverage_25": 0.11151133924722671, "rewards/frontier_coverage_5": 0.11151133924722671, "rewards/frontier_ece_reward": 0.013309185951948166, "signal/accuracy_reward/centered_abs_mean": 0.142132568359375, "signal/accuracy_reward/group_std_mean": 0.19049813449382783, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0710662841796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0710662841796875, "signal/advantage_abs_mean": 0.08017075657844544, "signal/advantage_pre_scale_abs_mean": 0.08017075657844544, "signal/advantage_pre_scale_std": 0.11746061593294144, "signal/advantage_std": 0.11746061593294144, "signal/brier_reward/centered_abs_mean": 0.15917613804340364, "signal/brier_reward/group_std_mean": 0.20062560141086577, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019897017255425455, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019897017255425455, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08653065264225006, "signal/confidence_uniqueness_reward/group_std_mean": 0.11002808213233947, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010816331580281257, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010816331580281257, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.0038669900968670845, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012066281167790293, "signal/frontier_aurc_reward/group_std_mean": 0.0019432639004662634, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1598641978926025e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1598641978926025e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_1/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_10/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_10/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_15/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_15/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_20/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_20/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_25/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_25/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_5/centered_abs_mean": 0.2550701230764389, "signal/frontier_coverage_5/group_std_mean": 0.3213431596755981, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004565754998475313, "signal/frontier_ece_reward/centered_abs_mean": 0.03292861394584179, "signal/frontier_ece_reward/group_std_mean": 0.04621725678443909, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004116076743230224, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004116076743230224, "step": 40 }, { "calibration/aurc": 0.2264348749109058, "calibration/batch_distribution_entropy": 0.947362197850653, "calibration/buffer_distribution_entropy": 0.9023169704024638, "calibration/confidence_entropy": 0.5164703875793273, "calibration/coverage@0%": 0.0375, "calibration/coverage@1%": 0.0375, "calibration/coverage@10%": 0.14453125, "calibration/coverage@15%": 0.29140625, "calibration/coverage@20%": 0.46796875, "calibration/coverage@25%": 0.7109375, "calibration/coverage@30%": 0.76171875, "calibration/coverage@5%": 0.06953125, "calibration/ece": 0.293532578125, "calibration/mean_confidence": 0.41213460937499996, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1085.0, "completions/max_terminated_length": 383.8, "completions/mean_length": 155.62353515625, "completions/mean_terminated_length": 155.21868896484375, "completions/min_length": 67.2, "completions/min_terminated_length": 67.2, "epoch": 0.144, "grad_norm": 0.0031078618485480547, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 150510396.0, "reward": 1.000108528137207, "reward_std": 0.10603977590799332, "rewards/accuracy_reward": 0.58642578125, "rewards/brier_reward": 0.7386995673179626, "rewards/confidence_uniqueness_reward": 0.8767276644706726, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0029010240454226733, "rewards/frontier_coverage_1": 0.026493354281410576, "rewards/frontier_coverage_10": 0.026493354281410576, "rewards/frontier_coverage_15": 0.026493354281410576, "rewards/frontier_coverage_20": 0.026493354281410576, "rewards/frontier_coverage_25": 0.026493354281410576, "rewards/frontier_coverage_5": 0.026493354281410576, "rewards/frontier_ece_reward": 0.018952517956495284, "signal/accuracy_reward/centered_abs_mean": 0.150006103515625, "signal/accuracy_reward/group_std_mean": 0.19647997319698335, "signal/accuracy_reward/group_zero_std_frac": 0.446875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0750030517578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0750030517578125, "signal/advantage_abs_mean": 0.08291901051998138, "signal/advantage_pre_scale_abs_mean": 0.08291901051998138, "signal/advantage_pre_scale_std": 0.11942969560623169, "signal/advantage_std": 0.11942969560623169, "signal/brier_reward/centered_abs_mean": 0.17461107671260834, "signal/brier_reward/group_std_mean": 0.21828512847423553, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021826384589076042, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021826384589076042, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06542369574308396, "signal/confidence_uniqueness_reward/group_std_mean": 0.08328969031572342, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008177961967885495, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008177961967885495, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014980694744735957, "signal/frontier_aurc_reward/group_std_mean": 0.0023212187923491003, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6815443561645225e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6815443561645225e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_1/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_10/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_10/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_15/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_15/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_20/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_20/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_25/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_25/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_5/centered_abs_mean": 0.2612448215484619, "signal/frontier_coverage_5/group_std_mean": 0.3272443234920502, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004676282219588756, "signal/frontier_ece_reward/centered_abs_mean": 0.039357250183820726, "signal/frontier_ece_reward/group_std_mean": 0.05260428786277771, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004919656272977591, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004919656272977591, "step": 45 }, { "calibration/aurc": 0.33544708568653764, "calibration/batch_distribution_entropy": 0.9641692497219696, "calibration/buffer_distribution_entropy": 0.9233838203150402, "calibration/confidence_entropy": 0.4748932057775385, "calibration/coverage@0%": 0.0046875, "calibration/coverage@1%": 0.0046875, "calibration/coverage@10%": 0.03671875, "calibration/coverage@15%": 0.15174938725490197, "calibration/coverage@20%": 0.2479810049019608, "calibration/coverage@25%": 0.2894546568627451, "calibration/coverage@30%": 0.34893688725490196, "calibration/coverage@5%": 0.0046875, "calibration/ece": 0.14829035447303923, "calibration/mean_confidence": 0.46134567493872547, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 742.4, "completions/max_terminated_length": 523.4, "completions/mean_length": 163.4974609375, "completions/mean_terminated_length": 163.36328125, "completions/min_length": 75.8, "completions/min_terminated_length": 75.8, "epoch": 0.16, "grad_norm": 0.0028037051670253277, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 167205538.0, "reward": 0.9897387862205506, "reward_std": 0.11076341718435287, "rewards/accuracy_reward": 0.5359375, "rewards/brier_reward": 0.7786368131637573, "rewards/confidence_uniqueness_reward": 0.88655526638031, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003066997462883592, "rewards/frontier_coverage_1": 0.09699134379625321, "rewards/frontier_coverage_10": 0.09699134379625321, "rewards/frontier_coverage_15": 0.09699134379625321, "rewards/frontier_coverage_20": 0.09699134379625321, "rewards/frontier_coverage_25": 0.09699134379625321, "rewards/frontier_coverage_5": 0.09699134379625321, "rewards/frontier_ece_reward": 0.026462964341044427, "signal/accuracy_reward/centered_abs_mean": 0.14306640625, "signal/accuracy_reward/group_std_mean": 0.1878939002752304, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071533203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.071533203125, "signal/advantage_abs_mean": 0.08609444797039031, "signal/advantage_pre_scale_abs_mean": 0.08609444797039031, "signal/advantage_pre_scale_std": 0.12765211164951323, "signal/advantage_std": 0.12765211164951323, "signal/brier_reward/centered_abs_mean": 0.16862273216247559, "signal/brier_reward/group_std_mean": 0.2126835286617279, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021077841520309448, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021077841520309448, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.056052202731370925, "signal/confidence_uniqueness_reward/group_std_mean": 0.06837449967861176, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070065253414213656, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070065253414213656, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002428090269677341, "signal/frontier_aurc_reward/group_std_mean": 0.0038595238234847783, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.346281566540711e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.346281566540711e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_1/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_10/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_10/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_15/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_15/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_20/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_20/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_25/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_25/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_5/centered_abs_mean": 0.22630979716777802, "signal/frontier_coverage_5/group_std_mean": 0.2893765389919281, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004050945350900293, "signal/frontier_ece_reward/centered_abs_mean": 0.04531662836670876, "signal/frontier_ece_reward/group_std_mean": 0.058810415863990786, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005664578545838595, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005664578545838595, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.4997777285356147, "eval_calibration/batch_distribution_entropy": 0.9348213202225528, "eval_calibration/buffer_distribution_entropy": 0.9305712700671265, "eval_calibration/confidence_entropy": 0.47102110753245985, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.0625, "eval_calibration/coverage@30%": 0.234375, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.281164314516129, "eval_calibration/mean_confidence": 0.5592389112903225, "eval_completions/clipped_ratio": 0.002049180327868827, "eval_completions/max_length": 911.5, "eval_completions/max_terminated_length": 311.5, "eval_completions/mean_length": 170.20043182373047, "eval_completions/mean_terminated_length": 167.39978790283203, "eval_completions/min_length": 84.0, "eval_completions/min_terminated_length": 84.0, "eval_loss": 0.0, "eval_num_tokens": 167205538.0, "eval_reward": 0.90069180727005, "eval_reward_std": 0.23488686978816986, "eval_rewards/accuracy_reward": 0.361328125, "eval_rewards/brier_reward": 0.7528277337551117, "eval_rewards/confidence_uniqueness_reward": 0.8379772901535034, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.005042638164013624, "eval_rewards/frontier_coverage_1": 0.18974924087524414, "eval_rewards/frontier_coverage_10": 0.18974924087524414, "eval_rewards/frontier_coverage_15": 0.18974924087524414, "eval_rewards/frontier_coverage_20": 0.18974924087524414, "eval_rewards/frontier_coverage_25": 0.18974924087524414, "eval_rewards/frontier_coverage_5": 0.18974924087524414, "eval_rewards/frontier_ece_reward": 0.01491912454366684, "eval_runtime": 19.3233, "eval_samples_per_second": 25.876, "eval_signal/accuracy_reward/centered_abs_mean": 0.4456787109375, "eval_signal/accuracy_reward/group_std_mean": 0.47858355939388275, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22283935546875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22283935546875, "eval_signal/advantage_abs_mean": 0.20646706968545914, "eval_signal/advantage_pre_scale_abs_mean": 0.20646706968545914, "eval_signal/advantage_pre_scale_std": 0.23231954872608185, "eval_signal/advantage_std": 0.23231954872608185, "eval_signal/brier_reward/centered_abs_mean": 0.22418075799942017, "eval_signal/brier_reward/group_std_mean": 0.2734896242618561, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02802259474992752, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02802259474992752, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07463713735342026, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09528587758541107, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009329642169177532, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009329642169177532, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004753857152536511, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007068477105349302, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.50940377858933e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.50940377858933e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_1/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_10/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_15/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_20/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_25/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.31350038945674896, "eval_signal/frontier_coverage_5/group_std_mean": 0.39658913016319275, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005611656466498971, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.05817369371652603, "eval_signal/frontier_ece_reward/group_std_mean": 0.08309631422162056, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007271711714565754, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007271711714565754, "eval_steps_per_second": 0.104, "step": 50 }, { "epoch": 0.16, "step": 50, "train_probe_calibration/aurc": 0.2664804729817548, "train_probe_calibration/batch_distribution_entropy": 0.9324186885257131, "train_probe_calibration/buffer_distribution_entropy": 0.9309467058219747, "train_probe_calibration/confidence_entropy": 0.441220735421333, "train_probe_calibration/coverage@0%": 0.03125, "train_probe_calibration/coverage@1%": 0.03125, "train_probe_calibration/coverage@10%": 0.03125, "train_probe_calibration/coverage@15%": 0.46875, "train_probe_calibration/coverage@20%": 0.484375, "train_probe_calibration/coverage@25%": 0.578125, "train_probe_calibration/coverage@30%": 0.640625, "train_probe_calibration/coverage@5%": 0.03125, "train_probe_calibration/ece": 0.17640625, "train_probe_calibration/mean_confidence": 0.57484375, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 366.0, "train_probe_completions/max_terminated_length": 366.0, "train_probe_completions/mean_length": 165.32955932617188, "train_probe_completions/mean_terminated_length": 165.32955932617188, "train_probe_completions/min_length": 66.0, "train_probe_completions/min_terminated_length": 66.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 167205538.0, "train_probe_reward": 1.0041134357452393, "train_probe_reward_std": 0.2359241172671318, "train_probe_rewards/accuracy_reward": 0.57421875, "train_probe_rewards/brier_reward": 0.7899730503559113, "train_probe_rewards/confidence_uniqueness_reward": 0.839599609375, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0027748874854296446, "train_probe_rewards/frontier_coverage_1": 0.08121992275118828, "train_probe_rewards/frontier_coverage_10": 0.08121992275118828, "train_probe_rewards/frontier_coverage_15": 0.08121992275118828, "train_probe_rewards/frontier_coverage_20": 0.08121992275118828, "train_probe_rewards/frontier_coverage_25": 0.08121992275118828, "train_probe_rewards/frontier_coverage_5": 0.08121992275118828, "train_probe_rewards/frontier_ece_reward": 0.03707304783165455, "train_probe_runtime": 9.186, "train_probe_samples_per_second": 54.43, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.48193359375, "train_probe_signal/accuracy_reward/group_std_mean": 0.49865010380744934, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.240966796875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.240966796875, "train_probe_signal/advantage_abs_mean": 0.2185721918940544, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2185721918940544, "train_probe_signal/advantage_pre_scale_std": 0.23293063789606094, "train_probe_signal/advantage_std": 0.23293063789606094, "train_probe_signal/brier_reward/centered_abs_mean": 0.20220646262168884, "train_probe_signal/brier_reward/group_std_mean": 0.24830978363752365, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025275807827711105, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.025275807827711105, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.076904296875, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09100573509931564, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009613037109375, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009613037109375, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.004000097163952887, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.00605845358222723, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.160173117881641e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.160173117881641e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30071887373924255, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4058589041233063, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0053828677628189325, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.06303473375737667, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.08334130793809891, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007879341719672084, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007879341719672084, "train_probe_steps_per_second": 0.218 }, { "calibration/aurc": 0.2950848289576411, "calibration/batch_distribution_entropy": 0.930281332331097, "calibration/buffer_distribution_entropy": 0.9341737996692416, "calibration/confidence_entropy": 0.4755722991194964, "calibration/coverage@0%": 0.01484375, "calibration/coverage@1%": 0.01484375, "calibration/coverage@10%": 0.16171875, "calibration/coverage@15%": 0.2046875, "calibration/coverage@20%": 0.2859375, "calibration/coverage@25%": 0.3765625, "calibration/coverage@30%": 0.5828125, "calibration/coverage@5%": 0.10234375, "calibration/ece": 0.1771126289368873, "calibration/mean_confidence": 0.5750459494944853, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1084.8, "completions/max_terminated_length": 497.4, "completions/mean_length": 168.68369140625, "completions/mean_terminated_length": 168.2832794189453, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.176, "grad_norm": 0.003821933874860406, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 184169979.0, "reward": 0.9811482787132263, "reward_std": 0.1093181312084198, "rewards/accuracy_reward": 0.52294921875, "rewards/brier_reward": 0.7728787422180176, "rewards/confidence_uniqueness_reward": 0.8689801812171936, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.003453007619827986, "rewards/frontier_coverage_1": 0.10514852032065392, "rewards/frontier_coverage_10": 0.10514852032065392, "rewards/frontier_coverage_15": 0.10514852032065392, "rewards/frontier_coverage_20": 0.10514852032065392, "rewards/frontier_coverage_25": 0.10514852032065392, "rewards/frontier_coverage_5": 0.10514852032065392, "rewards/frontier_ece_reward": 0.02763434946537018, "signal/accuracy_reward/centered_abs_mean": 0.128729248046875, "signal/accuracy_reward/group_std_mean": 0.1696704939007759, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0643646240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0643646240234375, "signal/advantage_abs_mean": 0.0838969498872757, "signal/advantage_pre_scale_abs_mean": 0.0838969498872757, "signal/advantage_pre_scale_std": 0.12963834255933762, "signal/advantage_std": 0.12963834255933762, "signal/brier_reward/centered_abs_mean": 0.16208215355873107, "signal/brier_reward/group_std_mean": 0.20663413107395173, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020260269194841383, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020260269194841383, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07365219593048096, "signal/confidence_uniqueness_reward/group_std_mean": 0.08795170336961747, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00920652449131012, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00920652449131012, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002859164075925946, "signal/frontier_aurc_reward/group_std_mean": 0.004245653934776783, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.117903638165444e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.117903638165444e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_1/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_10/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_10/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_15/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_15/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_20/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_20/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_25/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_25/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_5/centered_abs_mean": 0.1915457785129547, "signal/frontier_coverage_5/group_std_mean": 0.24914441704750062, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003428669273853302, "signal/frontier_ece_reward/centered_abs_mean": 0.04376091659069061, "signal/frontier_ece_reward/group_std_mean": 0.05602394491434097, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005470114573836326, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005470114573836326, "step": 55 }, { "calibration/aurc": 0.331854588779599, "calibration/batch_distribution_entropy": 0.8745741485620421, "calibration/buffer_distribution_entropy": 0.9365178507635724, "calibration/confidence_entropy": 0.4012815190096622, "calibration/coverage@0%": 0.003125, "calibration/coverage@1%": 0.003125, "calibration/coverage@10%": 0.05234375, "calibration/coverage@15%": 0.140625, "calibration/coverage@20%": 0.32421875, "calibration/coverage@25%": 0.3828125, "calibration/coverage@30%": 0.5078125, "calibration/coverage@5%": 0.003125, "calibration/ece": 0.173107109375, "calibration/mean_confidence": 0.6393742968749999, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 829.0, "completions/max_terminated_length": 608.8, "completions/mean_length": 167.47001953125, "completions/mean_terminated_length": 167.33668518066406, "completions/min_length": 78.8, "completions/min_terminated_length": 78.8, "epoch": 0.192, "grad_norm": 0.004035938531160355, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 200699688.0, "reward": 0.9879758596420288, "reward_std": 0.11625557094812393, "rewards/accuracy_reward": 0.53916015625, "rewards/brier_reward": 0.7739283800125122, "rewards/confidence_uniqueness_reward": 0.8580935597419739, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0036836853716522453, "rewards/frontier_coverage_1": 0.10152835100889206, "rewards/frontier_coverage_10": 0.10152835100889206, "rewards/frontier_coverage_15": 0.10152835100889206, "rewards/frontier_coverage_20": 0.10152835100889206, "rewards/frontier_coverage_25": 0.10152835100889206, "rewards/frontier_coverage_5": 0.10152835100889206, "rewards/frontier_ece_reward": 0.030000920966267587, "signal/accuracy_reward/centered_abs_mean": 0.136065673828125, "signal/accuracy_reward/group_std_mean": 0.179530268907547, "signal/accuracy_reward/group_zero_std_frac": 0.484375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0680328369140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0680328369140625, "signal/advantage_abs_mean": 0.08931153416633605, "signal/advantage_pre_scale_abs_mean": 0.08931153416633605, "signal/advantage_pre_scale_std": 0.13905880898237227, "signal/advantage_std": 0.13905880898237227, "signal/brier_reward/centered_abs_mean": 0.17017331421375276, "signal/brier_reward/group_std_mean": 0.21759623885154725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021271664276719095, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021271664276719095, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09090490639209747, "signal/confidence_uniqueness_reward/group_std_mean": 0.10824680477380752, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.011363113299012184, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011363113299012184, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036389449145644904, "signal/frontier_aurc_reward/group_std_mean": 0.0054845036007463936, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.513711123261601e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.513711123261601e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_1/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_10/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_10/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_15/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_15/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_20/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_20/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_25/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_25/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_5/centered_abs_mean": 0.17842654287815093, "signal/frontier_coverage_5/group_std_mean": 0.23917962312698365, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003193834982812405, "signal/frontier_ece_reward/centered_abs_mean": 0.04488262310624123, "signal/frontier_ece_reward/group_std_mean": 0.056452129036188126, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0056103278882801534, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0056103278882801534, "step": 60 }, { "calibration/aurc": 0.32373530576954757, "calibration/batch_distribution_entropy": 0.8591898290330275, "calibration/buffer_distribution_entropy": 0.9347868571410448, "calibration/confidence_entropy": 0.3849134860388972, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.09375, "calibration/coverage@15%": 0.24765625, "calibration/coverage@20%": 0.36015625, "calibration/coverage@25%": 0.546875, "calibration/coverage@30%": 0.65546875, "calibration/coverage@5%": 0.00859375, "calibration/ece": 0.18438471015114383, "calibration/mean_confidence": 0.6311039438521242, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 873.6, "completions/max_terminated_length": 432.2, "completions/mean_length": 166.7802734375, "completions/mean_terminated_length": 166.3782531738281, "completions/min_length": 72.8, "completions/min_terminated_length": 72.8, "epoch": 0.208, "grad_norm": 0.0025081464555114508, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 217439742.0, "reward": 1.0152548551559448, "reward_std": 0.11149686425924302, "rewards/accuracy_reward": 0.58408203125, "rewards/brier_reward": 0.7946485996246337, "rewards/confidence_uniqueness_reward": 0.8775890946388245, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0030152024235576393, "rewards/frontier_coverage_1": 0.09387013614177704, "rewards/frontier_coverage_10": 0.09387013614177704, "rewards/frontier_coverage_15": 0.09387013614177704, "rewards/frontier_coverage_20": 0.09387013614177704, "rewards/frontier_coverage_25": 0.09387013614177704, "rewards/frontier_coverage_5": 0.09387013614177704, "rewards/frontier_ece_reward": 0.03520463481545448, "signal/accuracy_reward/centered_abs_mean": 0.121368408203125, "signal/accuracy_reward/group_std_mean": 0.1650165855884552, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0606842041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0606842041015625, "signal/advantage_abs_mean": 0.08337271958589554, "signal/advantage_pre_scale_abs_mean": 0.08337271958589554, "signal/advantage_pre_scale_std": 0.13434576690196992, "signal/advantage_std": 0.13434576690196992, "signal/brier_reward/centered_abs_mean": 0.16763521134853362, "signal/brier_reward/group_std_mean": 0.21714569628238678, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020954401418566703, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020954401418566703, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08260471224784852, "signal/confidence_uniqueness_reward/group_std_mean": 0.0987936407327652, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010325589030981065, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010325589030981065, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003630819218233228, "signal/frontier_aurc_reward/group_std_mean": 0.005773447826504708, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.499166338471695e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.499166338471695e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_1/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_10/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_10/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_15/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_15/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_20/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_20/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_25/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_25/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_5/centered_abs_mean": 0.17451754212379456, "signal/frontier_coverage_5/group_std_mean": 0.23370930552482605, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031238638795912264, "signal/frontier_ece_reward/centered_abs_mean": 0.04130900949239731, "signal/frontier_ece_reward/group_std_mean": 0.05173059701919556, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005163626186549664, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005163626186549664, "step": 65 }, { "calibration/aurc": 0.3356664044008323, "calibration/batch_distribution_entropy": 0.9384537311646113, "calibration/buffer_distribution_entropy": 0.9376239395466122, "calibration/confidence_entropy": 0.40272087778388677, "calibration/coverage@0%": 0.03441176470588235, "calibration/coverage@1%": 0.03441176470588235, "calibration/coverage@10%": 0.1807077205882353, "calibration/coverage@15%": 0.2613296568627451, "calibration/coverage@20%": 0.310640318627451, "calibration/coverage@25%": 0.3513939950980392, "calibration/coverage@30%": 0.5038296568627451, "calibration/coverage@5%": 0.0696966911764706, "calibration/ece": 0.1596415861961429, "calibration/mean_confidence": 0.5144321171091331, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 1094.8, "completions/max_terminated_length": 759.4, "completions/mean_length": 166.6509765625, "completions/mean_terminated_length": 165.4495086669922, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.224, "grad_norm": 0.0021792047191411257, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 234299432.0, "reward": 1.0031249046325683, "reward_std": 0.10735798627138138, "rewards/accuracy_reward": 0.55029296875, "rewards/brier_reward": 0.7902113795280457, "rewards/confidence_uniqueness_reward": 0.8990541458129883, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.003141326270997524, "rewards/frontier_coverage_1": 0.12729544788599015, "rewards/frontier_coverage_10": 0.12729544788599015, "rewards/frontier_coverage_15": 0.12729544788599015, "rewards/frontier_coverage_20": 0.12729544788599015, "rewards/frontier_coverage_25": 0.12729544788599015, "rewards/frontier_coverage_5": 0.12729544788599015, "rewards/frontier_ece_reward": 0.029154983535408973, "signal/accuracy_reward/centered_abs_mean": 0.120965576171875, "signal/accuracy_reward/group_std_mean": 0.16113831400871276, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0604827880859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0604827880859375, "signal/advantage_abs_mean": 0.08187931925058364, "signal/advantage_pre_scale_abs_mean": 0.08187931925058364, "signal/advantage_pre_scale_std": 0.13045653700828552, "signal/advantage_std": 0.13045653700828552, "signal/brier_reward/centered_abs_mean": 0.17681266367435455, "signal/brier_reward/group_std_mean": 0.2258577436208725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02210158295929432, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02210158295929432, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0674636647105217, "signal/confidence_uniqueness_reward/group_std_mean": 0.08297923505306244, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008432958088815213, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008432958088815213, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844516694546, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036583705339580776, "signal/frontier_aurc_reward/group_std_mean": 0.0056047579273581505, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.548483070218935e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.548483070218935e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_1/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_10/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_10/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_15/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_15/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_20/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_20/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_25/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_25/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_5/centered_abs_mean": 0.19972024559974672, "signal/frontier_coverage_5/group_std_mean": 0.26246256828308107, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00357499229721725, "signal/frontier_ece_reward/centered_abs_mean": 0.03642488420009613, "signal/frontier_ece_reward/group_std_mean": 0.04521550685167312, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0045531105250120165, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0045531105250120165, "step": 70 }, { "calibration/aurc": 0.3522414944051539, "calibration/batch_distribution_entropy": 0.8803572798405815, "calibration/buffer_distribution_entropy": 0.9410783570721908, "calibration/confidence_entropy": 0.36782506176165003, "calibration/coverage@0%": 0.01875, "calibration/coverage@1%": 0.01875, "calibration/coverage@10%": 0.09140625, "calibration/coverage@15%": 0.128125, "calibration/coverage@20%": 0.16328125, "calibration/coverage@25%": 0.36796875, "calibration/coverage@30%": 0.496875, "calibration/coverage@5%": 0.0328125, "calibration/ece": 0.1927504647077757, "calibration/mean_confidence": 0.5310109334577756, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1190.8, "completions/max_terminated_length": 514.6, "completions/mean_length": 167.90107421875, "completions/mean_terminated_length": 167.3661346435547, "completions/min_length": 70.2, "completions/min_terminated_length": 70.2, "epoch": 0.24, "grad_norm": 0.0024263551458716393, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 251270419.0, "reward": 1.0291972041130066, "reward_std": 0.10552183389663697, "rewards/accuracy_reward": 0.60859375, "rewards/brier_reward": 0.7870316863059997, "rewards/confidence_uniqueness_reward": 0.91382737159729, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0025115890428423883, "rewards/frontier_coverage_1": 0.08323012106120586, "rewards/frontier_coverage_10": 0.08323012106120586, "rewards/frontier_coverage_15": 0.08323012106120586, "rewards/frontier_coverage_20": 0.08323012106120586, "rewards/frontier_coverage_25": 0.08323012106120586, "rewards/frontier_coverage_5": 0.08323012106120586, "rewards/frontier_ece_reward": 0.02914494350552559, "signal/accuracy_reward/centered_abs_mean": 0.1296142578125, "signal/accuracy_reward/group_std_mean": 0.16963129937648774, "signal/accuracy_reward/group_zero_std_frac": 0.521875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06480712890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06480712890625, "signal/advantage_abs_mean": 0.08079658448696136, "signal/advantage_pre_scale_abs_mean": 0.08079658448696136, "signal/advantage_pre_scale_std": 0.12934576272964476, "signal/advantage_std": 0.12934576272964476, "signal/brier_reward/centered_abs_mean": 0.17554612457752228, "signal/brier_reward/group_std_mean": 0.2262921988964081, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021943265572190285, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021943265572190285, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05461069941520691, "signal/confidence_uniqueness_reward/group_std_mean": 0.06644331142306328, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006826337426900864, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006826337426900864, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.003250430291518569, "signal/frontier_aurc_reward/group_std_mean": 0.005101799964904785, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.818270146846771e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.818270146846771e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_1/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_10/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_10/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_15/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_15/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_20/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_20/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_25/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_25/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_5/centered_abs_mean": 0.2036992698907852, "signal/frontier_coverage_5/group_std_mean": 0.2712432205677032, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003646216681227088, "signal/frontier_ece_reward/centered_abs_mean": 0.03291768655180931, "signal/frontier_ece_reward/group_std_mean": 0.04102036878466606, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041147108189761635, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041147108189761635, "step": 75 }, { "calibration/aurc": 0.21512356164155774, "calibration/batch_distribution_entropy": 0.8827675644974178, "calibration/buffer_distribution_entropy": 0.9422732160404657, "calibration/confidence_entropy": 0.36174526720048983, "calibration/coverage@0%": 0.06015625, "calibration/coverage@1%": 0.06015625, "calibration/coverage@10%": 0.2484375, "calibration/coverage@15%": 0.4230974264705882, "calibration/coverage@20%": 0.5403370098039215, "calibration/coverage@25%": 0.6317861519607844, "calibration/coverage@30%": 0.7466666666666667, "calibration/coverage@5%": 0.1515625, "calibration/ece": 0.1830301858879185, "calibration/mean_confidence": 0.5492079764272219, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 892.6, "completions/max_terminated_length": 458.0, "completions/mean_length": 164.4017578125, "completions/mean_terminated_length": 163.9999572753906, "completions/min_length": 75.8, "completions/min_terminated_length": 75.8, "epoch": 0.256, "grad_norm": 0.0027514868415892124, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 268008709.0, "reward": 1.0226893305778504, "reward_std": 0.09834913462400437, "rewards/accuracy_reward": 0.58173828125, "rewards/brier_reward": 0.8035358548164367, "rewards/confidence_uniqueness_reward": 0.9133859038352966, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0023928165435791017, "rewards/frontier_coverage_1": 0.12931015118956565, "rewards/frontier_coverage_10": 0.12931015118956565, "rewards/frontier_coverage_15": 0.12931015118956565, "rewards/frontier_coverage_20": 0.12931015118956565, "rewards/frontier_coverage_25": 0.12931015118956565, "rewards/frontier_coverage_5": 0.12931015118956565, "rewards/frontier_ece_reward": 0.028441504389047623, "signal/accuracy_reward/centered_abs_mean": 0.123785400390625, "signal/accuracy_reward/group_std_mean": 0.1612432286143303, "signal/accuracy_reward/group_zero_std_frac": 0.546875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0618927001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0618927001953125, "signal/advantage_abs_mean": 0.07478999271988869, "signal/advantage_pre_scale_abs_mean": 0.07478999271988869, "signal/advantage_pre_scale_std": 0.12347659170627594, "signal/advantage_std": 0.12347659170627594, "signal/brier_reward/centered_abs_mean": 0.16572422683238983, "signal/brier_reward/group_std_mean": 0.2140843689441681, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020715528354048728, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020715528354048728, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05513819307088852, "signal/confidence_uniqueness_reward/group_std_mean": 0.06670184880495071, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006892274133861065, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006892274133861065, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003011533757671714, "signal/frontier_aurc_reward/group_std_mean": 0.0047927751205861565, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.39064516487997e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.39064516487997e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_1/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_10/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_10/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_15/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_15/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_20/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_20/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_25/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_25/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_5/centered_abs_mean": 0.19958887100219727, "signal/frontier_coverage_5/group_std_mean": 0.2646804749965668, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035726406611502172, "signal/frontier_ece_reward/centered_abs_mean": 0.02867573909461498, "signal/frontier_ece_reward/group_std_mean": 0.03616860210895538, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035844673868268727, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035844673868268727, "step": 80 }, { "calibration/aurc": 0.21649586779760063, "calibration/batch_distribution_entropy": 0.8773805631285727, "calibration/buffer_distribution_entropy": 0.9423614066226536, "calibration/confidence_entropy": 0.3884055467369814, "calibration/coverage@0%": 0.0109375, "calibration/coverage@1%": 0.0109375, "calibration/coverage@10%": 0.18828125, "calibration/coverage@15%": 0.38828125, "calibration/coverage@20%": 0.47734375, "calibration/coverage@25%": 0.74609375, "calibration/coverage@30%": 0.8078125, "calibration/coverage@5%": 0.0921875, "calibration/ece": 0.12636070145032918, "calibration/mean_confidence": 0.6323173064632307, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1088.2, "completions/max_terminated_length": 434.4, "completions/mean_length": 169.712890625, "completions/mean_terminated_length": 169.04539794921874, "completions/min_length": 74.4, "completions/min_terminated_length": 74.4, "epoch": 0.272, "grad_norm": 0.0028915083967149258, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 284712265.0, "reward": 1.0215874552726745, "reward_std": 0.10492411553859711, "rewards/accuracy_reward": 0.58447265625, "rewards/brier_reward": 0.7885427713394165, "rewards/confidence_uniqueness_reward": 0.9305692434310913, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0025600562803447247, "rewards/frontier_coverage_1": 0.11024373397231102, "rewards/frontier_coverage_10": 0.11024373397231102, "rewards/frontier_coverage_15": 0.11024373397231102, "rewards/frontier_coverage_20": 0.11024373397231102, "rewards/frontier_coverage_25": 0.11024373397231102, "rewards/frontier_coverage_5": 0.11024373397231102, "rewards/frontier_ece_reward": 0.024467223882675172, "signal/accuracy_reward/centered_abs_mean": 0.121722412109375, "signal/accuracy_reward/group_std_mean": 0.16252617239952089, "signal/accuracy_reward/group_zero_std_frac": 0.5375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875, "signal/advantage_abs_mean": 0.07876608818769455, "signal/advantage_pre_scale_abs_mean": 0.07876608818769455, "signal/advantage_pre_scale_std": 0.12760126292705537, "signal/advantage_std": 0.12760126292705537, "signal/brier_reward/centered_abs_mean": 0.1705150604248047, "signal/brier_reward/group_std_mean": 0.22130897045135497, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021314382553100586, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021314382553100586, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04144674874842167, "signal/confidence_uniqueness_reward/group_std_mean": 0.05355666503310204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005180843593552709, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005180843593552709, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002936669299378991, "signal/frontier_aurc_reward/group_std_mean": 0.0046648337505757805, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.256637814454734e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.256637814454734e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_1/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_10/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_10/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_15/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_15/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_20/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_20/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_25/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_25/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_5/centered_abs_mean": 0.19893713295459747, "signal/frontier_coverage_5/group_std_mean": 0.2645448505878448, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035609744023531674, "signal/frontier_ece_reward/centered_abs_mean": 0.027610136568546294, "signal/frontier_ece_reward/group_std_mean": 0.034552381932735445, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0034512670710682867, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0034512670710682867, "step": 85 }, { "calibration/aurc": 0.24803686043807643, "calibration/batch_distribution_entropy": 0.8444372092396852, "calibration/buffer_distribution_entropy": 0.9429605942110868, "calibration/confidence_entropy": 0.36471631140917615, "calibration/coverage@0%": 0.0421875, "calibration/coverage@1%": 0.0421875, "calibration/coverage@10%": 0.2828125, "calibration/coverage@15%": 0.32890625, "calibration/coverage@20%": 0.4796875, "calibration/coverage@25%": 0.546875, "calibration/coverage@30%": 0.6203125, "calibration/coverage@5%": 0.196875, "calibration/ece": 0.12333755567361956, "calibration/mean_confidence": 0.6039962674593244, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1108.2, "completions/max_terminated_length": 514.0, "completions/mean_length": 166.53251953125, "completions/mean_terminated_length": 166.13165893554688, "completions/min_length": 74.6, "completions/min_terminated_length": 74.6, "epoch": 0.288, "grad_norm": 0.0030995451379567385, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 301375734.0, "reward": 1.0154645323753357, "reward_std": 0.10347330272197723, "rewards/accuracy_reward": 0.5708984375, "rewards/brier_reward": 0.7860757231712341, "rewards/confidence_uniqueness_reward": 0.9404593467712402, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002836257731541991, "rewards/frontier_coverage_1": 0.1086883544921875, "rewards/frontier_coverage_10": 0.1086883544921875, "rewards/frontier_coverage_15": 0.1086883544921875, "rewards/frontier_coverage_20": 0.1086883544921875, "rewards/frontier_coverage_25": 0.1086883544921875, "rewards/frontier_coverage_5": 0.1086883544921875, "rewards/frontier_ece_reward": 0.022170854546129704, "signal/accuracy_reward/centered_abs_mean": 0.1334228515625, "signal/accuracy_reward/group_std_mean": 0.17499251067638397, "signal/accuracy_reward/group_zero_std_frac": 0.503125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06671142578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06671142578125, "signal/advantage_abs_mean": 0.07898036390542984, "signal/advantage_pre_scale_abs_mean": 0.07898036390542984, "signal/advantage_pre_scale_std": 0.12728380411863327, "signal/advantage_std": 0.12728380411863327, "signal/brier_reward/centered_abs_mean": 0.16704794466495515, "signal/brier_reward/group_std_mean": 0.21451664268970488, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020880993083119394, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020880993083119394, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0323214516043663, "signal/confidence_uniqueness_reward/group_std_mean": 0.04215872809290886, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004040181450545788, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004040181450545788, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028704125434160233, "signal/frontier_aurc_reward/group_std_mean": 0.004553637374192477, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1380382501520216e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1380382501520216e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_1/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_10/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_10/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_15/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_15/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_20/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_20/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_25/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_25/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_5/centered_abs_mean": 0.19622489511966706, "signal/frontier_coverage_5/group_std_mean": 0.2572601854801178, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003512425487861037, "signal/frontier_ece_reward/centered_abs_mean": 0.026541993021965027, "signal/frontier_ece_reward/group_std_mean": 0.032760906219482425, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0033177491277456284, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0033177491277456284, "step": 90 }, { "calibration/aurc": 0.2445192555610003, "calibration/batch_distribution_entropy": 0.8722010859854628, "calibration/buffer_distribution_entropy": 0.9413355201822619, "calibration/confidence_entropy": 0.3738584132253522, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.2203125, "calibration/coverage@15%": 0.3078125, "calibration/coverage@20%": 0.49765625, "calibration/coverage@25%": 0.62578125, "calibration/coverage@30%": 0.70390625, "calibration/coverage@5%": 0.05859375, "calibration/ece": 0.12586600898720163, "calibration/mean_confidence": 0.5935988981383198, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 929.8, "completions/max_terminated_length": 490.4, "completions/mean_length": 169.30595703125, "completions/mean_terminated_length": 169.03911743164062, "completions/min_length": 80.8, "completions/min_terminated_length": 80.8, "epoch": 0.304, "grad_norm": 0.0025018032174557447, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 318039379.0, "reward": 1.0334564208984376, "reward_std": 0.09357217103242874, "rewards/accuracy_reward": 0.60791015625, "rewards/brier_reward": 0.7965248703956604, "rewards/confidence_uniqueness_reward": 0.9396258115768432, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0024371820967644454, "rewards/frontier_coverage_1": 0.09061380345374345, "rewards/frontier_coverage_10": 0.09061380345374345, "rewards/frontier_coverage_15": 0.09061380345374345, "rewards/frontier_coverage_20": 0.09061380345374345, "rewards/frontier_coverage_25": 0.09061380345374345, "rewards/frontier_coverage_5": 0.09061380345374345, "rewards/frontier_ece_reward": 0.023134828731417655, "signal/accuracy_reward/centered_abs_mean": 0.111419677734375, "signal/accuracy_reward/group_std_mean": 0.14964892268180846, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0557098388671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0557098388671875, "signal/advantage_abs_mean": 0.07044639587402343, "signal/advantage_pre_scale_abs_mean": 0.07044639587402343, "signal/advantage_pre_scale_std": 0.11818494796752929, "signal/advantage_std": 0.11818494796752929, "signal/brier_reward/centered_abs_mean": 0.15556592047214507, "signal/brier_reward/group_std_mean": 0.20124119520187378, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019445740059018134, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019445740059018134, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031480921804904936, "signal/confidence_uniqueness_reward/group_std_mean": 0.04011792093515396, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003935115225613117, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003935115225613117, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002605660632252693, "signal/frontier_aurc_reward/group_std_mean": 0.004119851719588041, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.664132356992923e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.664132356992923e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_1/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_10/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_10/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_15/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_15/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_20/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_20/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_25/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_25/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_5/centered_abs_mean": 0.17928916215896606, "signal/frontier_coverage_5/group_std_mean": 0.23793997764587402, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092759851366282, "signal/frontier_ece_reward/centered_abs_mean": 0.02426176182925701, "signal/frontier_ece_reward/group_std_mean": 0.030135614797472954, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0030327202286571263, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0030327202286571263, "step": 95 }, { "calibration/aurc": 0.1958580706055439, "calibration/batch_distribution_entropy": 0.8434134033250091, "calibration/buffer_distribution_entropy": 0.9401943668455596, "calibration/confidence_entropy": 0.36734551027565937, "calibration/coverage@0%": 0.032889093137254896, "calibration/coverage@1%": 0.032889093137254896, "calibration/coverage@10%": 0.2699050245098039, "calibration/coverage@15%": 0.4144822303921568, "calibration/coverage@20%": 0.6098314950980392, "calibration/coverage@25%": 0.7403707107843138, "calibration/coverage@30%": 0.8201439950980391, "calibration/coverage@5%": 0.15497242647058823, "calibration/ece": 0.1445590710969227, "calibration/mean_confidence": 0.6401742367558774, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1306.8, "completions/max_terminated_length": 501.4, "completions/mean_length": 168.332421875, "completions/mean_terminated_length": 167.664208984375, "completions/min_length": 80.2, "completions/min_terminated_length": 80.2, "epoch": 0.32, "grad_norm": 0.0026968803722411394, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 334851807.0, "reward": 1.0260323882102966, "reward_std": 0.08396224528551102, "rewards/accuracy_reward": 0.585546875, "rewards/brier_reward": 0.8071523427963256, "rewards/confidence_uniqueness_reward": 0.9357501029968261, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.002748763840645552, "rewards/frontier_coverage_1": 0.11854975577443838, "rewards/frontier_coverage_10": 0.11854975577443838, "rewards/frontier_coverage_15": 0.11854975577443838, "rewards/frontier_coverage_20": 0.11854975577443838, "rewards/frontier_coverage_25": 0.11854975577443838, "rewards/frontier_coverage_5": 0.11854975577443838, "rewards/frontier_ece_reward": 0.02365802228450775, "signal/accuracy_reward/centered_abs_mean": 0.08590087890625, "signal/accuracy_reward/group_std_mean": 0.1201841339468956, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042950439453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042950439453125, "signal/advantage_abs_mean": 0.06151105165481567, "signal/advantage_pre_scale_abs_mean": 0.06151105165481567, "signal/advantage_pre_scale_std": 0.10997560620307922, "signal/advantage_std": 0.10997560620307922, "signal/brier_reward/centered_abs_mean": 0.14419465661048889, "signal/brier_reward/group_std_mean": 0.18665907382965088, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01802433207631111, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01802433207631111, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03390970081090927, "signal/confidence_uniqueness_reward/group_std_mean": 0.04388536140322685, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004238712601363659, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004238712601363659, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002838291879743338, "signal/frontier_aurc_reward/group_std_mean": 0.004497240483760834, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.080542250652797e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.080542250652797e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_1/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_10/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_10/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_15/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_15/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_20/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_20/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_25/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_25/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_5/centered_abs_mean": 0.1544642448425293, "signal/frontier_coverage_5/group_std_mean": 0.20439959168434144, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027649099007248878, "signal/frontier_ece_reward/centered_abs_mean": 0.02174353301525116, "signal/frontier_ece_reward/group_std_mean": 0.02732553631067276, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002717941626906395, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002717941626906395, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5706461595447073, "eval_calibration/batch_distribution_entropy": 0.8726977897672046, "eval_calibration/buffer_distribution_entropy": 0.9393171128839451, "eval_calibration/confidence_entropy": 0.4035573482539014, "eval_calibration/coverage@0%": 0.015625, "eval_calibration/coverage@1%": 0.015625, "eval_calibration/coverage@10%": 0.015625, "eval_calibration/coverage@15%": 0.015625, "eval_calibration/coverage@20%": 0.015625, "eval_calibration/coverage@25%": 0.015625, "eval_calibration/coverage@30%": 0.015625, "eval_calibration/coverage@5%": 0.015625, "eval_calibration/ece": 0.24265838290022357, "eval_calibration/mean_confidence": 0.556418300597109, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 330.0, "eval_completions/max_terminated_length": 330.0, "eval_completions/mean_length": 171.14369201660156, "eval_completions/mean_terminated_length": 171.14369201660156, "eval_completions/min_length": 96.0, "eval_completions/min_terminated_length": 96.0, "eval_loss": 0.0, "eval_num_tokens": 334851807.0, "eval_reward": 0.940795511007309, "eval_reward_std": 0.23958701640367508, "eval_rewards/accuracy_reward": 0.423828125, "eval_rewards/brier_reward": 0.7562253475189209, "eval_rewards/confidence_uniqueness_reward": 0.89501953125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.00461793364956975, "eval_rewards/frontier_coverage_1": 0.19424864649772644, "eval_rewards/frontier_coverage_10": 0.19424864649772644, "eval_rewards/frontier_coverage_15": 0.19424864649772644, "eval_rewards/frontier_coverage_20": 0.19424864649772644, "eval_rewards/frontier_coverage_25": 0.19424864649772644, "eval_rewards/frontier_coverage_5": 0.19424864649772644, "eval_rewards/frontier_ece_reward": 0.013569748029112816, "eval_runtime": 9.8344, "eval_samples_per_second": 50.842, "eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125, "eval_signal/accuracy_reward/group_std_mean": 0.4925154745578766, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625, "eval_signal/advantage_abs_mean": 0.21671650558710098, "eval_signal/advantage_pre_scale_abs_mean": 0.21671650558710098, "eval_signal/advantage_pre_scale_std": 0.23654372990131378, "eval_signal/advantage_std": 0.23654372990131378, "eval_signal/brier_reward/centered_abs_mean": 0.2694649398326874, "eval_signal/brier_reward/group_std_mean": 0.32334399223327637, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03368311747908592, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.03368311747908592, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0469818115234375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.056731242686510086, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0058727264404296875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0058727264404296875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005505842389538884, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009029718115925789, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.855458120000549e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.855458120000549e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_1/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_10/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_15/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_20/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_25/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3532957285642624, "eval_signal/frontier_coverage_5/group_std_mean": 0.44448477029800415, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006323992973193526, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.033544719219207764, "eval_signal/frontier_ece_reward/group_std_mean": 0.04166281037032604, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0041930899024009705, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0041930899024009705, "eval_steps_per_second": 0.203, "step": 100 }, { "epoch": 0.32, "step": 100, "train_probe_calibration/aurc": 0.19455582700187118, "train_probe_calibration/batch_distribution_entropy": 0.8136888433586318, "train_probe_calibration/buffer_distribution_entropy": 0.939399714843496, "train_probe_calibration/confidence_entropy": 0.3994395901362911, "train_probe_calibration/coverage@0%": 0.125, "train_probe_calibration/coverage@1%": 0.125, "train_probe_calibration/coverage@10%": 0.296875, "train_probe_calibration/coverage@15%": 0.5625, "train_probe_calibration/coverage@20%": 0.609375, "train_probe_calibration/coverage@25%": 0.84375, "train_probe_calibration/coverage@30%": 0.953125, "train_probe_calibration/coverage@5%": 0.125, "train_probe_calibration/ece": 0.17050834345107496, "train_probe_calibration/mean_confidence": 0.6579534095658197, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 360.5, "train_probe_completions/max_terminated_length": 360.5, "train_probe_completions/mean_length": 168.69681549072266, "train_probe_completions/mean_terminated_length": 168.69681549072266, "train_probe_completions/min_length": 83.5, "train_probe_completions/min_terminated_length": 83.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 334851807.0, "train_probe_reward": 1.0333038568496704, "train_probe_reward_std": 0.23331268876791, "train_probe_rewards/accuracy_reward": 0.61328125, "train_probe_rewards/brier_reward": 0.8115493357181549, "train_probe_rewards/confidence_uniqueness_reward": 0.89208984375, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0020465875859372318, "train_probe_rewards/frontier_coverage_1": 0.10023730993270874, "train_probe_rewards/frontier_coverage_10": 0.10023730993270874, "train_probe_rewards/frontier_coverage_15": 0.10023730993270874, "train_probe_rewards/frontier_coverage_20": 0.10023730993270874, "train_probe_rewards/frontier_coverage_25": 0.10023730993270874, "train_probe_rewards/frontier_coverage_5": 0.10023730993270874, "train_probe_rewards/frontier_ece_reward": 0.023835722357034683, "train_probe_runtime": 9.3482, "train_probe_samples_per_second": 53.486, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.462158203125, "train_probe_signal/accuracy_reward/group_std_mean": 0.48812438547611237, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2310791015625, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2310791015625, "train_probe_signal/advantage_abs_mean": 0.2127402350306511, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2127402350306511, "train_probe_signal/advantage_pre_scale_std": 0.23030224442481995, "train_probe_signal/advantage_std": 0.23030224442481995, "train_probe_signal/brier_reward/centered_abs_mean": 0.22354336827993393, "train_probe_signal/brier_reward/group_std_mean": 0.28484727442264557, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02794292103499174, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02794292103499174, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.046661376953125, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05584513582289219, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005832672119140625, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005832672119140625, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0033606411889195442, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.005513262702152133, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.0155478422529995e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.0155478422529995e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30417926609516144, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.42205144464969635, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005444808630272746, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.03267330303788185, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.040300922468304634, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004084162879735231, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004084162879735231, "train_probe_steps_per_second": 0.214 }, { "calibration/aurc": 0.2704139956053221, "calibration/batch_distribution_entropy": 0.9040743573140506, "calibration/buffer_distribution_entropy": 0.9399021081106627, "calibration/confidence_entropy": 0.40396196903365356, "calibration/coverage@0%": 0.0171875, "calibration/coverage@1%": 0.0171875, "calibration/coverage@10%": 0.18515625, "calibration/coverage@15%": 0.44453125, "calibration/coverage@20%": 0.53671875, "calibration/coverage@25%": 0.58203125, "calibration/coverage@30%": 0.65078125, "calibration/coverage@5%": 0.10859375, "calibration/ece": 0.1906777345961955, "calibration/mean_confidence": 0.5680050226904052, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 638.8, "completions/max_terminated_length": 410.2, "completions/mean_length": 168.918359375, "completions/mean_terminated_length": 168.7849609375, "completions/min_length": 75.2, "completions/min_terminated_length": 75.2, "epoch": 0.336, "grad_norm": 0.0018242798978462815, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 351303963.0, "reward": 1.0275990962982178, "reward_std": 0.09201982617378235, "rewards/accuracy_reward": 0.58984375, "rewards/brier_reward": 0.8076816439628601, "rewards/confidence_uniqueness_reward": 0.9373004913330079, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002289101597853005, "rewards/frontier_coverage_1": 0.11363897696137429, "rewards/frontier_coverage_10": 0.11363897696137429, "rewards/frontier_coverage_15": 0.11363897696137429, "rewards/frontier_coverage_20": 0.11363897696137429, "rewards/frontier_coverage_25": 0.11384689658880234, "rewards/frontier_coverage_5": 0.11363897696137429, "rewards/frontier_ece_reward": 0.020657552778720854, "signal/accuracy_reward/centered_abs_mean": 0.10953369140625, "signal/accuracy_reward/group_std_mean": 0.15012021660804747, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054766845703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.054766845703125, "signal/advantage_abs_mean": 0.06871124505996704, "signal/advantage_pre_scale_abs_mean": 0.06871124505996704, "signal/advantage_pre_scale_std": 0.11801368445158004, "signal/advantage_std": 0.11801368445158004, "signal/brier_reward/centered_abs_mean": 0.14712486565113067, "signal/brier_reward/group_std_mean": 0.1890992045402527, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018390608206391334, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018390608206391334, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030815805494785308, "signal/confidence_uniqueness_reward/group_std_mean": 0.039999409765005114, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0038519756868481635, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038519756868481635, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086068242787, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002245573024265468, "signal/frontier_aurc_reward/group_std_mean": 0.0035346172749996184, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.019575717393309e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.019575717393309e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1706451177597046, "signal/frontier_coverage_1/group_std_mean": 0.22382004261016847, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_10/centered_abs_mean": 0.1706451177597046, "signal/frontier_coverage_10/group_std_mean": 0.22382004261016847, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_15/centered_abs_mean": 0.1706451177597046, "signal/frontier_coverage_15/group_std_mean": 0.22382004261016847, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_20/centered_abs_mean": 0.1706451177597046, "signal/frontier_coverage_20/group_std_mean": 0.22382004261016847, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_25/centered_abs_mean": 0.16888906955718994, "signal/frontier_coverage_25/group_std_mean": 0.22164588570594787, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030231142416596414, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030231142416596414, "signal/frontier_coverage_5/centered_abs_mean": 0.1706451177597046, "signal/frontier_coverage_5/group_std_mean": 0.22382004261016847, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030545474495738746, "signal/frontier_ece_reward/centered_abs_mean": 0.02038377448916435, "signal/frontier_ece_reward/group_std_mean": 0.025580647960305215, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002547971811145544, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002547971811145544, "step": 105 }, { "calibration/aurc": 0.24142044817482367, "calibration/batch_distribution_entropy": 0.8656001475514318, "calibration/buffer_distribution_entropy": 0.9409112708199789, "calibration/confidence_entropy": 0.3802200750041355, "calibration/coverage@0%": 0.04068321078431373, "calibration/coverage@1%": 0.04068321078431373, "calibration/coverage@10%": 0.2774356617647059, "calibration/coverage@15%": 0.40088541666666666, "calibration/coverage@20%": 0.49231617647058823, "calibration/coverage@25%": 0.6063909313725491, "calibration/coverage@30%": 0.6743841911764706, "calibration/coverage@5%": 0.17272365196078432, "calibration/ece": 0.08609408255888487, "calibration/mean_confidence": 0.5179847446127612, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 821.2, "completions/max_terminated_length": 582.6, "completions/mean_length": 169.8603515625, "completions/mean_terminated_length": 169.72701416015624, "completions/min_length": 78.2, "completions/min_terminated_length": 78.2, "epoch": 0.352, "grad_norm": 0.002221801085397601, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 368303749.0, "reward": 0.9986960291862488, "reward_std": 0.0819695919752121, "rewards/accuracy_reward": 0.52705078125, "rewards/brier_reward": 0.7985443472862244, "rewards/confidence_uniqueness_reward": 0.9362337708473205, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0025407387875020504, "rewards/frontier_coverage_1": 0.15299645960330963, "rewards/frontier_coverage_10": 0.15299645960330963, "rewards/frontier_coverage_15": 0.15299645960330963, "rewards/frontier_coverage_20": 0.15299645960330963, "rewards/frontier_coverage_25": 0.14608888924121857, "rewards/frontier_coverage_5": 0.15299645960330963, "rewards/frontier_ece_reward": 0.017266629636287688, "signal/accuracy_reward/centered_abs_mean": 0.095184326171875, "signal/accuracy_reward/group_std_mean": 0.1254624456167221, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0475921630859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0475921630859375, "signal/advantage_abs_mean": 0.06207955777645111, "signal/advantage_pre_scale_abs_mean": 0.06207955777645111, "signal/advantage_pre_scale_std": 0.10653006732463836, "signal/advantage_std": 0.10653006732463836, "signal/brier_reward/centered_abs_mean": 0.1445058435201645, "signal/brier_reward/group_std_mean": 0.1861796945333481, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01806323044002056, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01806323044002056, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030342183634638788, "signal/confidence_uniqueness_reward/group_std_mean": 0.0386995404958725, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037927729543298485, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037927729543298485, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001989635010249913, "signal/frontier_aurc_reward/group_std_mean": 0.0031029653735458853, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.561446574167349e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.561446574167349e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17928497791290282, "signal/frontier_coverage_1/group_std_mean": 0.23090406954288484, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_10/centered_abs_mean": 0.17928497791290282, "signal/frontier_coverage_10/group_std_mean": 0.23090406954288484, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_15/centered_abs_mean": 0.17928497791290282, "signal/frontier_coverage_15/group_std_mean": 0.23090406954288484, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_20/centered_abs_mean": 0.17928497791290282, "signal/frontier_coverage_20/group_std_mean": 0.23090406954288484, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_25/centered_abs_mean": 0.17467791438102723, "signal/frontier_coverage_25/group_std_mean": 0.22531512677669524, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031267345417290925, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031267345417290925, "signal/frontier_coverage_5/centered_abs_mean": 0.17928497791290282, "signal/frontier_coverage_5/group_std_mean": 0.23090406954288484, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032092009671032427, "signal/frontier_ece_reward/centered_abs_mean": 0.018327732756733894, "signal/frontier_ece_reward/group_std_mean": 0.02301064059138298, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022909665945917367, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022909665945917367, "step": 110 }, { "calibration/aurc": 0.25086900973025095, "calibration/batch_distribution_entropy": 0.9186404347150997, "calibration/buffer_distribution_entropy": 0.9416779594893804, "calibration/confidence_entropy": 0.4035477266164838, "calibration/coverage@0%": 0.03438112745098039, "calibration/coverage@1%": 0.03438112745098039, "calibration/coverage@10%": 0.1820373774509804, "calibration/coverage@15%": 0.21797487745098038, "calibration/coverage@20%": 0.4117984068627451, "calibration/coverage@25%": 0.5314705882352941, "calibration/coverage@30%": 0.6292800245098039, "calibration/coverage@5%": 0.16172487745098038, "calibration/ece": 0.15323516031703535, "calibration/mean_confidence": 0.5679697731010848, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 904.2, "completions/max_terminated_length": 493.4, "completions/mean_length": 169.8033203125, "completions/mean_terminated_length": 169.5369659423828, "completions/min_length": 76.8, "completions/min_terminated_length": 76.8, "epoch": 0.368, "grad_norm": 0.0022946952376514673, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 385108007.0, "reward": 1.0292543292045593, "reward_std": 0.0754001870751381, "rewards/accuracy_reward": 0.58623046875, "rewards/brier_reward": 0.8222095847129822, "rewards/confidence_uniqueness_reward": 0.9371579766273499, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0019215317443013191, "rewards/frontier_coverage_1": 0.13132742196321487, "rewards/frontier_coverage_10": 0.13132742196321487, "rewards/frontier_coverage_15": 0.13132742196321487, "rewards/frontier_coverage_20": 0.13132742196321487, "rewards/frontier_coverage_25": 0.12279371917247772, "rewards/frontier_coverage_5": 0.13132742196321487, "rewards/frontier_ece_reward": 0.01957782618701458, "signal/accuracy_reward/centered_abs_mean": 0.084307861328125, "signal/accuracy_reward/group_std_mean": 0.11866024732589722, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625, "signal/advantage_abs_mean": 0.05573421791195869, "signal/advantage_pre_scale_abs_mean": 0.05573421791195869, "signal/advantage_pre_scale_std": 0.09996391981840133, "signal/advantage_std": 0.09996391981840133, "signal/brier_reward/centered_abs_mean": 0.131490296125412, "signal/brier_reward/group_std_mean": 0.17088458240032195, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0164362870156765, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0164362870156765, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02735428810119629, "signal/confidence_uniqueness_reward/group_std_mean": 0.034791599959135056, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003419286012649536, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003419286012649536, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017890902236104012, "signal/frontier_aurc_reward/group_std_mean": 0.00294273984618485, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.2024714892031624e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.2024714892031624e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16438928842544556, "signal/frontier_coverage_1/group_std_mean": 0.21578606963157654, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_10/centered_abs_mean": 0.16438928842544556, "signal/frontier_coverage_10/group_std_mean": 0.21578606963157654, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_15/centered_abs_mean": 0.16438928842544556, "signal/frontier_coverage_15/group_std_mean": 0.21578606963157654, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_20/centered_abs_mean": 0.16438928842544556, "signal/frontier_coverage_20/group_std_mean": 0.21578606963157654, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_25/centered_abs_mean": 0.15546331703662872, "signal/frontier_coverage_25/group_std_mean": 0.2039874643087387, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0027827932965010403, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027827932965010403, "signal/frontier_coverage_5/centered_abs_mean": 0.16438928842544556, "signal/frontier_coverage_5/group_std_mean": 0.21578606963157654, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002942568203434348, "signal/frontier_ece_reward/centered_abs_mean": 0.01668607220053673, "signal/frontier_ece_reward/group_std_mean": 0.02099420689046383, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002085759025067091, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002085759025067091, "step": 115 }, { "calibration/aurc": 0.2703069251334617, "calibration/batch_distribution_entropy": 0.8990586414219536, "calibration/buffer_distribution_entropy": 0.9422862701184694, "calibration/confidence_entropy": 0.4024924860193864, "calibration/coverage@0%": 0.04765625, "calibration/coverage@1%": 0.04765625, "calibration/coverage@10%": 0.38125, "calibration/coverage@15%": 0.425, "calibration/coverage@20%": 0.4703125, "calibration/coverage@25%": 0.515625, "calibration/coverage@30%": 0.55546875, "calibration/coverage@5%": 0.23828125, "calibration/ece": 0.1594206011865718, "calibration/mean_confidence": 0.5080374241118275, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 729.8, "completions/max_terminated_length": 581.2, "completions/mean_length": 171.50654296875, "completions/mean_terminated_length": 171.239794921875, "completions/min_length": 76.6, "completions/min_terminated_length": 76.6, "epoch": 0.384, "grad_norm": 0.0017057686345651746, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 401720746.0, "reward": 1.030127477645874, "reward_std": 0.08005195558071136, "rewards/accuracy_reward": 0.5873046875, "rewards/brier_reward": 0.8276395082473755, "rewards/confidence_uniqueness_reward": 0.9362314462661743, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.001922252168878913, "rewards/frontier_coverage_1": 0.13151057362556456, "rewards/frontier_coverage_10": 0.13151057362556456, "rewards/frontier_coverage_15": 0.13151057362556456, "rewards/frontier_coverage_20": 0.13151057362556456, "rewards/frontier_coverage_25": 0.11768633276224136, "rewards/frontier_coverage_5": 0.13151057362556456, "rewards/frontier_ece_reward": 0.019144237600266935, "signal/accuracy_reward/centered_abs_mean": 0.09979248046875, "signal/accuracy_reward/group_std_mean": 0.13565291166305543, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049896240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049896240234375, "signal/advantage_abs_mean": 0.058755910396575926, "signal/advantage_pre_scale_abs_mean": 0.058755910396575926, "signal/advantage_pre_scale_std": 0.10700914263725281, "signal/advantage_std": 0.10700914263725281, "signal/brier_reward/centered_abs_mean": 0.12325199693441391, "signal/brier_reward/group_std_mean": 0.16149061620235444, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015406499616801739, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015406499616801739, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027864859998226167, "signal/confidence_uniqueness_reward/group_std_mean": 0.036340619623661044, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003483107499778271, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003483107499778271, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017306852154433728, "signal/frontier_aurc_reward/group_std_mean": 0.002775628166273236, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.097926237387583e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.097926237387583e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15827414095401765, "signal/frontier_coverage_1/group_std_mean": 0.20665526986122132, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_10/centered_abs_mean": 0.15827414095401765, "signal/frontier_coverage_10/group_std_mean": 0.20665526986122132, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_15/centered_abs_mean": 0.15827414095401765, "signal/frontier_coverage_15/group_std_mean": 0.20665526986122132, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_20/centered_abs_mean": 0.15827414095401765, "signal/frontier_coverage_20/group_std_mean": 0.20665526986122132, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_25/centered_abs_mean": 0.13856834620237352, "signal/frontier_coverage_25/group_std_mean": 0.1815927118062973, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024803733453154565, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024803733453154565, "signal/frontier_coverage_5/centered_abs_mean": 0.15827414095401765, "signal/frontier_coverage_5/group_std_mean": 0.20665526986122132, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002833107067272067, "signal/frontier_ece_reward/centered_abs_mean": 0.015285241603851318, "signal/frontier_ece_reward/group_std_mean": 0.019187380746006965, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019106552004814147, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019106552004814147, "step": 120 }, { "calibration/aurc": 0.29008772776734093, "calibration/batch_distribution_entropy": 0.9157935092331382, "calibration/buffer_distribution_entropy": 0.9443153619361073, "calibration/confidence_entropy": 0.4131123080990579, "calibration/coverage@0%": 0.017981004901960786, "calibration/coverage@1%": 0.017981004901960786, "calibration/coverage@10%": 0.043762254901960784, "calibration/coverage@15%": 0.14454350490196077, "calibration/coverage@20%": 0.28598345588235297, "calibration/coverage@25%": 0.46881740196078436, "calibration/coverage@30%": 0.6245557598039216, "calibration/coverage@5%": 0.017981004901960786, "calibration/ece": 0.16912397182889852, "calibration/mean_confidence": 0.5452109849824474, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 633.8, "completions/max_terminated_length": 433.0, "completions/mean_length": 174.31162109375, "completions/mean_terminated_length": 174.17910766601562, "completions/min_length": 85.2, "completions/min_terminated_length": 85.2, "epoch": 0.4, "grad_norm": 0.0025587843265384436, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 418542145.0, "reward": 1.0214404821395875, "reward_std": 0.084488844871521, "rewards/accuracy_reward": 0.583984375, "rewards/brier_reward": 0.7968339323997498, "rewards/confidence_uniqueness_reward": 0.936758029460907, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0024314658250659702, "rewards/frontier_coverage_1": 0.10493959616869689, "rewards/frontier_coverage_10": 0.10493959616869689, "rewards/frontier_coverage_15": 0.10493959616869689, "rewards/frontier_coverage_20": 0.10493959616869689, "rewards/frontier_coverage_25": 0.0941769102588296, "rewards/frontier_coverage_5": 0.10493959616869689, "rewards/frontier_ece_reward": 0.01528221946209669, "signal/accuracy_reward/centered_abs_mean": 0.103076171875, "signal/accuracy_reward/group_std_mean": 0.14248399436473846, "signal/accuracy_reward/group_zero_std_frac": 0.56875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0515380859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0515380859375, "signal/advantage_abs_mean": 0.06206804737448692, "signal/advantage_pre_scale_abs_mean": 0.06206804737448692, "signal/advantage_pre_scale_std": 0.11141373813152314, "signal/advantage_std": 0.11141373813152314, "signal/brier_reward/centered_abs_mean": 0.1348109632730484, "signal/brier_reward/group_std_mean": 0.1749127984046936, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01685137040913105, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01685137040913105, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02733922004699707, "signal/confidence_uniqueness_reward/group_std_mean": 0.035421935841441154, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003417402505874634, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003417402505874634, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086068242787, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002029798785224557, "signal/frontier_aurc_reward/group_std_mean": 0.0032225903123617172, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.633339802036062e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.633339802036062e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15750395655632018, "signal/frontier_coverage_1/group_std_mean": 0.2061130702495575, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_10/centered_abs_mean": 0.15750395655632018, "signal/frontier_coverage_10/group_std_mean": 0.2061130702495575, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_15/centered_abs_mean": 0.15750395655632018, "signal/frontier_coverage_15/group_std_mean": 0.2061130702495575, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_20/centered_abs_mean": 0.15750395655632018, "signal/frontier_coverage_20/group_std_mean": 0.2061130702495575, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_25/centered_abs_mean": 0.12791687697172166, "signal/frontier_coverage_25/group_std_mean": 0.16793505549430848, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022897121030837297, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022897121030837297, "signal/frontier_coverage_5/centered_abs_mean": 0.15750395655632018, "signal/frontier_coverage_5/group_std_mean": 0.2061130702495575, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002819320699200034, "signal/frontier_ece_reward/centered_abs_mean": 0.015673490427434444, "signal/frontier_ece_reward/group_std_mean": 0.01953093260526657, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0019591863034293055, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0019591863034293055, "step": 125 }, { "calibration/aurc": 0.300496468657274, "calibration/batch_distribution_entropy": 0.860343677952838, "calibration/buffer_distribution_entropy": 0.9456375276771343, "calibration/confidence_entropy": 0.4292367542687396, "calibration/coverage@0%": 0.04453125, "calibration/coverage@1%": 0.04453125, "calibration/coverage@10%": 0.3421875, "calibration/coverage@15%": 0.47421875, "calibration/coverage@20%": 0.56484375, "calibration/coverage@25%": 0.61171875, "calibration/coverage@30%": 0.61875, "calibration/coverage@5%": 0.13828125, "calibration/ece": 0.1725572010194605, "calibration/mean_confidence": 0.5447724534313727, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 661.0, "completions/max_terminated_length": 436.8, "completions/mean_length": 177.3251953125, "completions/mean_terminated_length": 177.19320983886718, "completions/min_length": 80.6, "completions/min_terminated_length": 80.6, "epoch": 0.416, "grad_norm": 0.0018295373301953077, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 435239139.0, "reward": 1.0142476677894592, "reward_std": 0.07882001847028733, "rewards/accuracy_reward": 0.5599609375, "rewards/brier_reward": 0.8115284204483032, "rewards/confidence_uniqueness_reward": 0.9416091442108154, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.001915666786953807, "rewards/frontier_coverage_1": 0.12817499786615372, "rewards/frontier_coverage_10": 0.12817499786615372, "rewards/frontier_coverage_15": 0.12817499786615372, "rewards/frontier_coverage_20": 0.12817499786615372, "rewards/frontier_coverage_25": 0.11044178158044815, "rewards/frontier_coverage_5": 0.12817499786615372, "rewards/frontier_ece_reward": 0.014857827685773373, "signal/accuracy_reward/centered_abs_mean": 0.0997314453125, "signal/accuracy_reward/group_std_mean": 0.13128983080387116, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04986572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04986572265625, "signal/advantage_abs_mean": 0.05995083674788475, "signal/advantage_pre_scale_abs_mean": 0.05995083674788475, "signal/advantage_pre_scale_std": 0.10547690689563752, "signal/advantage_std": 0.10547690689563752, "signal/brier_reward/centered_abs_mean": 0.12911611646413804, "signal/brier_reward/group_std_mean": 0.16568702459335327, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016139514558017255, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016139514558017255, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02463034950196743, "signal/confidence_uniqueness_reward/group_std_mean": 0.031486156210303304, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030787936877459286, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030787936877459286, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014531841035932303, "signal/frontier_aurc_reward/group_std_mean": 0.002290627988986671, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6011993395513854e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6011993395513854e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17003713846206664, "signal/frontier_coverage_1/group_std_mean": 0.21856652796268464, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_10/centered_abs_mean": 0.17003713846206664, "signal/frontier_coverage_10/group_std_mean": 0.21856652796268464, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_15/centered_abs_mean": 0.17003713846206664, "signal/frontier_coverage_15/group_std_mean": 0.21856652796268464, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_20/centered_abs_mean": 0.17003713846206664, "signal/frontier_coverage_20/group_std_mean": 0.21856652796268464, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_25/centered_abs_mean": 0.1356060341000557, "signal/frontier_coverage_25/group_std_mean": 0.17483413219451904, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002427347889170051, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002427347889170051, "signal/frontier_coverage_5/centered_abs_mean": 0.17003713846206664, "signal/frontier_coverage_5/group_std_mean": 0.21856652796268464, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030436647590249776, "signal/frontier_ece_reward/centered_abs_mean": 0.013920800760388374, "signal/frontier_ece_reward/group_std_mean": 0.017522389814257622, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017401000950485468, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017401000950485468, "step": 130 }, { "calibration/aurc": 0.1721532756826449, "calibration/batch_distribution_entropy": 0.8766754582035976, "calibration/buffer_distribution_entropy": 0.9459665615870213, "calibration/confidence_entropy": 0.3974590959603793, "calibration/coverage@0%": 0.10237132352941176, "calibration/coverage@1%": 0.16409007352941177, "calibration/coverage@10%": 0.5047549019607842, "calibration/coverage@15%": 0.5892555147058823, "calibration/coverage@20%": 0.6220955882352941, "calibration/coverage@25%": 0.6886182598039217, "calibration/coverage@30%": 0.7379197303921569, "calibration/coverage@5%": 0.44065257352941173, "calibration/ece": 0.15323011827988556, "calibration/mean_confidence": 0.6029602178207593, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 753.0, "completions/max_terminated_length": 553.2, "completions/mean_length": 176.35029296875, "completions/mean_terminated_length": 176.21759643554688, "completions/min_length": 84.8, "completions/min_terminated_length": 84.8, "epoch": 0.432, "grad_norm": 0.0018569445237517357, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 452059302.0, "reward": 1.0361051321029664, "reward_std": 0.07464597374200821, "rewards/accuracy_reward": 0.598828125, "rewards/brier_reward": 0.8299178600311279, "rewards/confidence_uniqueness_reward": 0.9411059260368347, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.001714168442413211, "rewards/frontier_coverage_1": 0.12789682820439338, "rewards/frontier_coverage_10": 0.12789682820439338, "rewards/frontier_coverage_15": 0.12789682820439338, "rewards/frontier_coverage_20": 0.12789682820439338, "rewards/frontier_coverage_25": 0.1091009445488453, "rewards/frontier_coverage_5": 0.12789682820439338, "rewards/frontier_ece_reward": 0.016333967633545398, "signal/accuracy_reward/centered_abs_mean": 0.09854736328125, "signal/accuracy_reward/group_std_mean": 0.12864942103624344, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049273681640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049273681640625, "signal/advantage_abs_mean": 0.05766047313809395, "signal/advantage_pre_scale_abs_mean": 0.05766047313809395, "signal/advantage_pre_scale_std": 0.10433387905359268, "signal/advantage_std": 0.10433387905359268, "signal/brier_reward/centered_abs_mean": 0.11633200347423553, "signal/brier_reward/group_std_mean": 0.14996844828128814, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014541500434279441, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014541500434279441, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025572020933032034, "signal/confidence_uniqueness_reward/group_std_mean": 0.03261452466249466, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031965026166290043, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031965026166290043, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014799919212237001, "signal/frontier_aurc_reward/group_std_mean": 0.0023754774127155544, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6491854441701435e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6491854441701435e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15234991312026977, "signal/frontier_coverage_1/group_std_mean": 0.19905296862125396, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_10/centered_abs_mean": 0.15234991312026977, "signal/frontier_coverage_10/group_std_mean": 0.19905296862125396, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_15/centered_abs_mean": 0.15234991312026977, "signal/frontier_coverage_15/group_std_mean": 0.19905296862125396, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_20/centered_abs_mean": 0.15234991312026977, "signal/frontier_coverage_20/group_std_mean": 0.19905296862125396, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_25/centered_abs_mean": 0.1211901381611824, "signal/frontier_coverage_25/group_std_mean": 0.15898216962814332, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021693034097552298, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021693034097552298, "signal/frontier_coverage_5/centered_abs_mean": 0.15234991312026977, "signal/frontier_coverage_5/group_std_mean": 0.19905296862125396, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002727063372731209, "signal/frontier_ece_reward/centered_abs_mean": 0.012844923511147499, "signal/frontier_ece_reward/group_std_mean": 0.01613148283213377, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016056154388934373, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016056154388934373, "step": 135 }, { "calibration/aurc": 0.25730294087394334, "calibration/batch_distribution_entropy": 0.8953334895238516, "calibration/buffer_distribution_entropy": 0.9452347981622419, "calibration/confidence_entropy": 0.40368902257402794, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.2546875, "calibration/coverage@15%": 0.32578125, "calibration/coverage@20%": 0.4125, "calibration/coverage@25%": 0.4640625, "calibration/coverage@30%": 0.67890625, "calibration/coverage@5%": 0.02109375, "calibration/ece": 0.1689719891195878, "calibration/mean_confidence": 0.6208841497585194, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 462.2, "completions/max_terminated_length": 462.2, "completions/mean_length": 182.95234375, "completions/mean_terminated_length": 182.95234375, "completions/min_length": 85.8, "completions/min_terminated_length": 85.8, "epoch": 0.448, "grad_norm": 0.0040541719645261765, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 468885534.0, "reward": 1.0192960262298585, "reward_std": 0.07691188901662827, "rewards/accuracy_reward": 0.56416015625, "rewards/brier_reward": 0.8227449178695678, "rewards/confidence_uniqueness_reward": 0.9410862445831298, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0022552535170689224, "rewards/frontier_coverage_1": 0.14381106197834015, "rewards/frontier_coverage_10": 0.14381106197834015, "rewards/frontier_coverage_15": 0.14381106197834015, "rewards/frontier_coverage_20": 0.14381106197834015, "rewards/frontier_coverage_25": 0.12079337984323502, "rewards/frontier_coverage_5": 0.14381106197834015, "rewards/frontier_ece_reward": 0.014734631776809693, "signal/accuracy_reward/centered_abs_mean": 0.091912841796875, "signal/accuracy_reward/group_std_mean": 0.12225985080003739, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459564208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459564208984375, "signal/advantage_abs_mean": 0.057493841648101805, "signal/advantage_pre_scale_abs_mean": 0.057493841648101805, "signal/advantage_pre_scale_std": 0.10439873188734054, "signal/advantage_std": 0.10439873188734054, "signal/brier_reward/centered_abs_mean": 0.1224316492676735, "signal/brier_reward/group_std_mean": 0.1602381944656372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015303956158459187, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015303956158459187, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026432880386710166, "signal/confidence_uniqueness_reward/group_std_mean": 0.03415291607379913, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033041100483387708, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033041100483387708, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001860675076022744, "signal/frontier_aurc_reward/group_std_mean": 0.003051386307924986, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.330608233227394e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.330608233227394e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15158057063817978, "signal/frontier_coverage_1/group_std_mean": 0.19816445112228392, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_10/centered_abs_mean": 0.15158057063817978, "signal/frontier_coverage_10/group_std_mean": 0.19816445112228392, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_15/centered_abs_mean": 0.15158057063817978, "signal/frontier_coverage_15/group_std_mean": 0.19816445112228392, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_20/centered_abs_mean": 0.15158057063817978, "signal/frontier_coverage_20/group_std_mean": 0.19816445112228392, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_25/centered_abs_mean": 0.11687376201152802, "signal/frontier_coverage_25/group_std_mean": 0.15403735041618347, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020920401671901344, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020920401671901344, "signal/frontier_coverage_5/centered_abs_mean": 0.15158057063817978, "signal/frontier_coverage_5/group_std_mean": 0.19816445112228392, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002713292092084885, "signal/frontier_ece_reward/centered_abs_mean": 0.013030365109443665, "signal/frontier_ece_reward/group_std_mean": 0.01642268504947424, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001628795638680458, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001628795638680458, "step": 140 }, { "calibration/aurc": 0.31054807919449445, "calibration/batch_distribution_entropy": 0.8982943819177818, "calibration/buffer_distribution_entropy": 0.9453990493361039, "calibration/confidence_entropy": 0.4392308925461128, "calibration/coverage@0%": 0.010159313725490195, "calibration/coverage@1%": 0.010159313725490195, "calibration/coverage@10%": 0.11797181372549019, "calibration/coverage@15%": 0.1523468137254902, "calibration/coverage@20%": 0.2476593137254902, "calibration/coverage@25%": 0.2921905637254902, "calibration/coverage@30%": 0.5638878676470588, "calibration/coverage@5%": 0.010159313725490195, "calibration/ece": 0.16542806795759996, "calibration/mean_confidence": 0.6450125473590109, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1155.6, "completions/max_terminated_length": 662.2, "completions/mean_length": 187.3685546875, "completions/mean_terminated_length": 186.973779296875, "completions/min_length": 88.8, "completions/min_terminated_length": 88.8, "epoch": 0.464, "grad_norm": 0.0020874959882348776, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 485975004.0, "reward": 0.9936848402023315, "reward_std": 0.07820483893156052, "rewards/accuracy_reward": 0.5205078125, "rewards/brier_reward": 0.7968811154365539, "rewards/confidence_uniqueness_reward": 0.934678053855896, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0026155672036111354, "rewards/frontier_coverage_1": 0.15401808321475982, "rewards/frontier_coverage_10": 0.15401808321475982, "rewards/frontier_coverage_15": 0.15401808321475982, "rewards/frontier_coverage_20": 0.15401808321475982, "rewards/frontier_coverage_25": 0.12326906770467758, "rewards/frontier_coverage_5": 0.15401808321475982, "rewards/frontier_ece_reward": 0.011458772234618664, "signal/accuracy_reward/centered_abs_mean": 0.084423828125, "signal/accuracy_reward/group_std_mean": 0.11666271984577178, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422119140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422119140625, "signal/advantage_abs_mean": 0.05724867507815361, "signal/advantage_pre_scale_abs_mean": 0.05724867507815361, "signal/advantage_pre_scale_std": 0.10630969554185868, "signal/advantage_std": 0.10630969554185868, "signal/brier_reward/centered_abs_mean": 0.12861161679029465, "signal/brier_reward/group_std_mean": 0.16630764305591583, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01607645209878683, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01607645209878683, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028835254535079003, "signal/confidence_uniqueness_reward/group_std_mean": 0.03829977139830589, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036044068168848754, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036044068168848754, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0019000403117388487, "signal/frontier_aurc_reward/group_std_mean": 0.0030668860767036677, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.40107213560259e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.40107213560259e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14896406829357148, "signal/frontier_coverage_1/group_std_mean": 0.19503563046455383, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_10/centered_abs_mean": 0.14896406829357148, "signal/frontier_coverage_10/group_std_mean": 0.19503563046455383, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_15/centered_abs_mean": 0.14896406829357148, "signal/frontier_coverage_15/group_std_mean": 0.19503563046455383, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_20/centered_abs_mean": 0.14896406829357148, "signal/frontier_coverage_20/group_std_mean": 0.19503563046455383, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_25/centered_abs_mean": 0.11530720740556717, "signal/frontier_coverage_25/group_std_mean": 0.15157280564308168, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020639989525079727, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020639989525079727, "signal/frontier_coverage_5/centered_abs_mean": 0.14896406829357148, "signal/frontier_coverage_5/group_std_mean": 0.19503563046455383, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002666456811130047, "signal/frontier_ece_reward/centered_abs_mean": 0.012446103803813457, "signal/frontier_ece_reward/group_std_mean": 0.015705187618732453, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001555762975476682, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001555762975476682, "step": 145 }, { "calibration/aurc": 0.24557281662898522, "calibration/batch_distribution_entropy": 0.8341399106383033, "calibration/buffer_distribution_entropy": 0.9449403760979717, "calibration/confidence_entropy": 0.3687400847573974, "calibration/coverage@0%": 0.065625, "calibration/coverage@1%": 0.065625, "calibration/coverage@10%": 0.22734375, "calibration/coverage@15%": 0.3375, "calibration/coverage@20%": 0.446875, "calibration/coverage@25%": 0.56953125, "calibration/coverage@30%": 0.74375, "calibration/coverage@5%": 0.14296875, "calibration/ece": 0.11169667756470028, "calibration/mean_confidence": 0.5292462022999398, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 911.8, "completions/max_terminated_length": 479.0, "completions/mean_length": 183.63916015625, "completions/mean_terminated_length": 183.241748046875, "completions/min_length": 88.6, "completions/min_terminated_length": 88.6, "epoch": 0.48, "grad_norm": 0.0024119976442307234, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 502903501.0, "reward": 1.0209609508514403, "reward_std": 0.08060411512851715, "rewards/accuracy_reward": 0.57626953125, "rewards/brier_reward": 0.8086855053901673, "rewards/confidence_uniqueness_reward": 0.9317057371139527, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0018240779172629118, "rewards/frontier_coverage_1": 0.1329729899764061, "rewards/frontier_coverage_10": 0.1329729899764061, "rewards/frontier_coverage_15": 0.1329729899764061, "rewards/frontier_coverage_20": 0.1329729899764061, "rewards/frontier_coverage_25": 0.10789064913988114, "rewards/frontier_coverage_5": 0.1329729899764061, "rewards/frontier_ece_reward": 0.012992727011442185, "signal/accuracy_reward/centered_abs_mean": 0.114581298828125, "signal/accuracy_reward/group_std_mean": 0.1539652705192566, "signal/accuracy_reward/group_zero_std_frac": 0.55, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0572906494140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0572906494140625, "signal/advantage_abs_mean": 0.060080311447381976, "signal/advantage_pre_scale_abs_mean": 0.060080311447381976, "signal/advantage_pre_scale_std": 0.10676742047071457, "signal/advantage_std": 0.10676742047071457, "signal/brier_reward/centered_abs_mean": 0.13224513232707977, "signal/brier_reward/group_std_mean": 0.16912654638290406, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01653064154088497, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01653064154088497, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030292441695928575, "signal/confidence_uniqueness_reward/group_std_mean": 0.0388321079313755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003786555211991072, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003786555211991072, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013736919732764362, "signal/frontier_aurc_reward/group_std_mean": 0.0021870420314371586, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.458908493281342e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.458908493281342e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18107914328575134, "signal/frontier_coverage_1/group_std_mean": 0.23383658230304719, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_10/centered_abs_mean": 0.18107914328575134, "signal/frontier_coverage_10/group_std_mean": 0.23383658230304719, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_15/centered_abs_mean": 0.18107914328575134, "signal/frontier_coverage_15/group_std_mean": 0.23383658230304719, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_20/centered_abs_mean": 0.18107914328575134, "signal/frontier_coverage_20/group_std_mean": 0.23383658230304719, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_25/centered_abs_mean": 0.13092263638973237, "signal/frontier_coverage_25/group_std_mean": 0.170234015583992, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002343515120446682, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002343515120446682, "signal/frontier_coverage_5/centered_abs_mean": 0.18107914328575134, "signal/frontier_coverage_5/group_std_mean": 0.23383658230304719, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003241316508501768, "signal/frontier_ece_reward/centered_abs_mean": 0.012281083315610886, "signal/frontier_ece_reward/group_std_mean": 0.015359072759747506, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015351354144513608, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015351354144513608, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.406549383364957, "eval_calibration/batch_distribution_entropy": 0.8909663454672565, "eval_calibration/buffer_distribution_entropy": 0.9446340496301424, "eval_calibration/confidence_entropy": 0.46453067903100087, "eval_calibration/coverage@0%": 0.078125, "eval_calibration/coverage@1%": 0.078125, "eval_calibration/coverage@10%": 0.078125, "eval_calibration/coverage@15%": 0.109375, "eval_calibration/coverage@20%": 0.109375, "eval_calibration/coverage@25%": 0.140625, "eval_calibration/coverage@30%": 0.21875, "eval_calibration/coverage@5%": 0.078125, "eval_calibration/ece": 0.1869676292547091, "eval_calibration/mean_confidence": 0.5198574552595667, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 451.5, "eval_completions/max_terminated_length": 451.5, "eval_completions/mean_length": 184.18531799316406, "eval_completions/mean_terminated_length": 184.18531799316406, "eval_completions/min_length": 94.5, "eval_completions/min_terminated_length": 94.5, "eval_loss": 0.0, "eval_num_tokens": 502903501.0, "eval_reward": 0.9559306502342224, "eval_reward_std": 0.22658731788396835, "eval_rewards/accuracy_reward": 0.443359375, "eval_rewards/brier_reward": 0.7953170835971832, "eval_rewards/confidence_uniqueness_reward": 0.896484375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0025530497077852488, "eval_rewards/frontier_coverage_1": 0.20948684215545654, "eval_rewards/frontier_coverage_10": 0.20948684215545654, "eval_rewards/frontier_coverage_15": 0.20948684215545654, "eval_rewards/frontier_coverage_20": 0.20948684215545654, "eval_rewards/frontier_coverage_25": 0.1534598395228386, "eval_rewards/frontier_coverage_5": 0.20948684215545654, "eval_rewards/frontier_ece_reward": 0.010603584349155426, "eval_runtime": 11.305, "eval_samples_per_second": 44.228, "eval_signal/accuracy_reward/centered_abs_mean": 0.4813232421875, "eval_signal/accuracy_reward/group_std_mean": 0.49823255836963654, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24066162109375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.24066162109375, "eval_signal/advantage_abs_mean": 0.20942430198192596, "eval_signal/advantage_pre_scale_abs_mean": 0.20942430198192596, "eval_signal/advantage_pre_scale_std": 0.223799467086792, "eval_signal/advantage_std": 0.223799467086792, "eval_signal/brier_reward/centered_abs_mean": 0.21622556447982788, "eval_signal/brier_reward/group_std_mean": 0.2670576274394989, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027028195559978485, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027028195559978485, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0438079833984375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05144248157739639, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054759979248046875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054759979248046875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002718214178457856, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004650075454264879, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.865603295911569e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.865603295911569e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3949373662471771, "eval_signal/frontier_coverage_1/group_std_mean": 0.4775615483522415, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3949373662471771, "eval_signal/frontier_coverage_10/group_std_mean": 0.4775615483522415, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3949373662471771, "eval_signal/frontier_coverage_15/group_std_mean": 0.4775615483522415, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3949373662471771, "eval_signal/frontier_coverage_20/group_std_mean": 0.4775615483522415, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2780953347682953, "eval_signal/frontier_coverage_25/group_std_mean": 0.34059378504753113, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004977906821295619, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004977906821295619, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3949373662471771, "eval_signal/frontier_coverage_5/group_std_mean": 0.4775615483522415, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00706937862560153, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.017484422773122787, "eval_signal/frontier_ece_reward/group_std_mean": 0.021346506662666798, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021855528466403484, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021855528466403484, "eval_steps_per_second": 0.177, "step": 150 }, { "epoch": 0.48, "step": 150, "train_probe_calibration/aurc": 0.15857725846094667, "train_probe_calibration/batch_distribution_entropy": 0.9130259925773516, "train_probe_calibration/buffer_distribution_entropy": 0.944713675838337, "train_probe_calibration/confidence_entropy": 0.4207142744029133, "train_probe_calibration/coverage@0%": 0.125, "train_probe_calibration/coverage@1%": 0.125, "train_probe_calibration/coverage@10%": 0.5, "train_probe_calibration/coverage@15%": 0.65625, "train_probe_calibration/coverage@20%": 0.75, "train_probe_calibration/coverage@25%": 0.828125, "train_probe_calibration/coverage@30%": 0.875, "train_probe_calibration/coverage@5%": 0.390625, "train_probe_calibration/ece": 0.20172079164661832, "train_probe_calibration/mean_confidence": 0.5198955010753575, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 321.5, "train_probe_completions/max_terminated_length": 321.5, "train_probe_completions/mean_length": 178.07261657714844, "train_probe_completions/mean_terminated_length": 178.07261657714844, "train_probe_completions/min_length": 96.0, "train_probe_completions/min_terminated_length": 96.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 502903501.0, "train_probe_reward": 1.0524759888648987, "train_probe_reward_std": 0.20973487198352814, "train_probe_rewards/accuracy_reward": 0.654296875, "train_probe_rewards/brier_reward": 0.8249536752700806, "train_probe_rewards/confidence_uniqueness_reward": 0.891845703125, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0013824773486703634, "train_probe_rewards/frontier_coverage_1": 0.0867544673383236, "train_probe_rewards/frontier_coverage_10": 0.0867544673383236, "train_probe_rewards/frontier_coverage_15": 0.0867544673383236, "train_probe_rewards/frontier_coverage_20": 0.0867544673383236, "train_probe_rewards/frontier_coverage_25": 0.06929008662700653, "train_probe_rewards/frontier_coverage_5": 0.0867544673383236, "train_probe_rewards/frontier_ece_reward": 0.01398058095946908, "train_probe_runtime": 8.5257, "train_probe_samples_per_second": 58.646, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4410400390625, "train_probe_signal/accuracy_reward/group_std_mean": 0.4765031486749649, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22052001953125, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22052001953125, "train_probe_signal/advantage_abs_mean": 0.18870525062084198, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18870525062084198, "train_probe_signal/advantage_pre_scale_std": 0.20722128450870514, "train_probe_signal/advantage_std": 0.20722128450870514, "train_probe_signal/brier_reward/centered_abs_mean": 0.1941361352801323, "train_probe_signal/brier_reward/group_std_mean": 0.2597276568412781, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024267016910016537, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024267016910016537, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047119140625, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05483095906674862, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005889892578125, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005889892578125, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020549558103084564, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0037329471670091152, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6783709219889715e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6783709219889715e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3552343100309372, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4747858941555023, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3552343100309372, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.4747858941555023, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3552343100309372, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.4747858941555023, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3552343100309372, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.4747858941555023, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.24588338285684586, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.33837637305259705, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004401312442496419, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004401312442496419, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3552343100309372, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4747858941555023, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006358693819493055, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.017886138521134853, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.021670137532055378, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0022357673151418567, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0022357673151418567, "train_probe_steps_per_second": 0.235 }, { "calibration/aurc": 0.36937569815613946, "calibration/batch_distribution_entropy": 0.8804409015073205, "calibration/buffer_distribution_entropy": 0.9450503390407532, "calibration/confidence_entropy": 0.405562550806981, "calibration/coverage@0%": 0.078125, "calibration/coverage@1%": 0.078125, "calibration/coverage@10%": 0.14140625, "calibration/coverage@15%": 0.15390625, "calibration/coverage@20%": 0.18828125, "calibration/coverage@25%": 0.3640625, "calibration/coverage@30%": 0.44765625, "calibration/coverage@5%": 0.1078125, "calibration/ece": 0.15152305063456092, "calibration/mean_confidence": 0.5187107282621319, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 678.6, "completions/max_terminated_length": 502.6, "completions/mean_length": 184.14736328125, "completions/mean_terminated_length": 184.01534423828124, "completions/min_length": 83.6, "completions/min_terminated_length": 83.6, "epoch": 0.496, "grad_norm": 0.005962767172604799, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 520097010.0, "reward": 1.044260597229004, "reward_std": 0.0687633216381073, "rewards/accuracy_reward": 0.6216796875, "rewards/brier_reward": 0.8228810787200928, "rewards/confidence_uniqueness_reward": 0.942884886264801, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0015023096697404982, "rewards/frontier_coverage_1": 0.1085168793797493, "rewards/frontier_coverage_10": 0.1085168793797493, "rewards/frontier_coverage_15": 0.1085168793797493, "rewards/frontier_coverage_20": 0.1085168793797493, "rewards/frontier_coverage_25": 0.08267375081777573, "rewards/frontier_coverage_5": 0.1085168793797493, "rewards/frontier_ece_reward": 0.01266906913369894, "signal/accuracy_reward/centered_abs_mean": 0.08406982421875, "signal/accuracy_reward/group_std_mean": 0.11525466293096542, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042034912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042034912109375, "signal/advantage_abs_mean": 0.051340526342391966, "signal/advantage_pre_scale_abs_mean": 0.051340526342391966, "signal/advantage_pre_scale_std": 0.0964614674448967, "signal/advantage_std": 0.0964614674448967, "signal/brier_reward/centered_abs_mean": 0.11293443143367768, "signal/brier_reward/group_std_mean": 0.1477597326040268, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01411680392920971, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01411680392920971, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02416303977370262, "signal/confidence_uniqueness_reward/group_std_mean": 0.030411677807569502, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030203799717128275, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030203799717128275, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.001119971019215882, "signal/frontier_aurc_reward/group_std_mean": 0.0018046426121145487, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0047481302754023e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0047481302754023e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1459894895553589, "signal/frontier_coverage_1/group_std_mean": 0.1935875177383423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_10/centered_abs_mean": 0.1459894895553589, "signal/frontier_coverage_10/group_std_mean": 0.1935875177383423, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_15/centered_abs_mean": 0.1459894895553589, "signal/frontier_coverage_15/group_std_mean": 0.1935875177383423, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_20/centered_abs_mean": 0.1459894895553589, "signal/frontier_coverage_20/group_std_mean": 0.1935875177383423, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_25/centered_abs_mean": 0.101905357837677, "signal/frontier_coverage_25/group_std_mean": 0.135857430100441, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018241058802232145, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018241058802232145, "signal/frontier_coverage_5/centered_abs_mean": 0.1459894895553589, "signal/frontier_coverage_5/group_std_mean": 0.1935875177383423, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026132117491215467, "signal/frontier_ece_reward/centered_abs_mean": 0.011225111037492751, "signal/frontier_ece_reward/group_std_mean": 0.014030049927532673, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001403138879686594, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001403138879686594, "step": 155 }, { "calibration/aurc": 0.19729564683186535, "calibration/batch_distribution_entropy": 0.9365230444547216, "calibration/buffer_distribution_entropy": 0.9458546354861728, "calibration/confidence_entropy": 0.4251162815208058, "calibration/coverage@0%": 0.05625, "calibration/coverage@1%": 0.05625, "calibration/coverage@10%": 0.409375, "calibration/coverage@15%": 0.459375, "calibration/coverage@20%": 0.559375, "calibration/coverage@25%": 0.634375, "calibration/coverage@30%": 0.828125, "calibration/coverage@5%": 0.1625, "calibration/ece": 0.1667415023265532, "calibration/mean_confidence": 0.4842607593122642, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 893.0, "completions/max_terminated_length": 456.8, "completions/mean_length": 183.923828125, "completions/mean_terminated_length": 183.6598693847656, "completions/min_length": 90.2, "completions/min_terminated_length": 90.2, "epoch": 0.512, "grad_norm": 0.0023144185543060303, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 537126054.0, "reward": 1.0360616207122804, "reward_std": 0.07599924206733703, "rewards/accuracy_reward": 0.6048828125, "rewards/brier_reward": 0.8223283767700196, "rewards/confidence_uniqueness_reward": 0.9420121669769287, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.001600394258275628, "rewards/frontier_coverage_1": 0.1132353588938713, "rewards/frontier_coverage_10": 0.1132353588938713, "rewards/frontier_coverage_15": 0.1132353588938713, "rewards/frontier_coverage_20": 0.1132353588938713, "rewards/frontier_coverage_25": 0.08508779406547547, "rewards/frontier_coverage_5": 0.1132353588938713, "rewards/frontier_ece_reward": 0.012370448373258115, "signal/accuracy_reward/centered_abs_mean": 0.0976318359375, "signal/accuracy_reward/group_std_mean": 0.13105546683073044, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04881591796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04881591796875, "signal/advantage_abs_mean": 0.05740831717848778, "signal/advantage_pre_scale_abs_mean": 0.05740831717848778, "signal/advantage_pre_scale_std": 0.10611572861671448, "signal/advantage_std": 0.10611572861671448, "signal/brier_reward/centered_abs_mean": 0.1118384689092636, "signal/brier_reward/group_std_mean": 0.1458802491426468, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01397980861365795, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01397980861365795, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024859635904431344, "signal/confidence_uniqueness_reward/group_std_mean": 0.031609703600406644, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003107454488053918, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003107454488053918, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012359362561255693, "signal/frontier_aurc_reward/group_std_mean": 0.001959600206464529, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.212325871369103e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.212325871369103e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14064022451639174, "signal/frontier_coverage_1/group_std_mean": 0.18756941258907317, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_10/centered_abs_mean": 0.14064022451639174, "signal/frontier_coverage_10/group_std_mean": 0.18756941258907317, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_15/centered_abs_mean": 0.14064022451639174, "signal/frontier_coverage_15/group_std_mean": 0.18756941258907317, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_20/centered_abs_mean": 0.14064022451639174, "signal/frontier_coverage_20/group_std_mean": 0.18756941258907317, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_25/centered_abs_mean": 0.09089281260967255, "signal/frontier_coverage_25/group_std_mean": 0.12226969897747039, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016269813058897853, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016269813058897853, "signal/frontier_coverage_5/centered_abs_mean": 0.14064022451639174, "signal/frontier_coverage_5/group_std_mean": 0.18756941258907317, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002517459914088249, "signal/frontier_ece_reward/centered_abs_mean": 0.010960309766232967, "signal/frontier_ece_reward/group_std_mean": 0.01376073807477951, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001370038720779121, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001370038720779121, "step": 160 }, { "calibration/aurc": 0.10147454873978934, "calibration/batch_distribution_entropy": 0.8797335226096754, "calibration/buffer_distribution_entropy": 0.946214670936134, "calibration/confidence_entropy": 0.42908605137799893, "calibration/coverage@0%": 0.190625, "calibration/coverage@1%": 0.190625, "calibration/coverage@10%": 0.615625, "calibration/coverage@15%": 0.740625, "calibration/coverage@20%": 0.8328125, "calibration/coverage@25%": 0.8875, "calibration/coverage@30%": 0.9671875, "calibration/coverage@5%": 0.4078125, "calibration/ece": 0.15184928203773357, "calibration/mean_confidence": 0.6085124254121397, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 697.0, "completions/max_terminated_length": 519.8, "completions/mean_length": 186.444140625, "completions/mean_terminated_length": 186.31230773925782, "completions/min_length": 93.8, "completions/min_terminated_length": 93.8, "epoch": 0.528, "grad_norm": 0.0019444272620603442, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 554064778.0, "reward": 1.0289855241775512, "reward_std": 0.07206702530384064, "rewards/accuracy_reward": 0.58603515625, "rewards/brier_reward": 0.8264921069145202, "rewards/confidence_uniqueness_reward": 0.9401045680046082, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0016523070633411407, "rewards/frontier_coverage_1": 0.13403759896755219, "rewards/frontier_coverage_10": 0.13403759896755219, "rewards/frontier_coverage_15": 0.13403759896755219, "rewards/frontier_coverage_20": 0.13403759896755219, "rewards/frontier_coverage_25": 0.09650920405983925, "rewards/frontier_coverage_5": 0.13403759896755219, "rewards/frontier_ece_reward": 0.011983232945203781, "signal/accuracy_reward/centered_abs_mean": 0.096856689453125, "signal/accuracy_reward/group_std_mean": 0.13027185052633286, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0484283447265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0484283447265625, "signal/advantage_abs_mean": 0.05456642434000969, "signal/advantage_pre_scale_abs_mean": 0.05456642434000969, "signal/advantage_pre_scale_std": 0.10015368908643722, "signal/advantage_std": 0.10015368908643722, "signal/brier_reward/centered_abs_mean": 0.11027712374925613, "signal/brier_reward/group_std_mean": 0.14253330528736113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013784640468657017, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013784640468657017, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025565633177757265, "signal/confidence_uniqueness_reward/group_std_mean": 0.03242117166519165, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003195704147219658, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003195704147219658, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011575968354009091, "signal/frontier_aurc_reward/group_std_mean": 0.001790312142111361, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0720982865896077e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0720982865896077e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15162838697433473, "signal/frontier_coverage_1/group_std_mean": 0.19694490134716033, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_10/centered_abs_mean": 0.15162838697433473, "signal/frontier_coverage_10/group_std_mean": 0.19694490134716033, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_15/centered_abs_mean": 0.15162838697433473, "signal/frontier_coverage_15/group_std_mean": 0.19694490134716033, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_20/centered_abs_mean": 0.15162838697433473, "signal/frontier_coverage_20/group_std_mean": 0.19694490134716033, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_25/centered_abs_mean": 0.09777042716741562, "signal/frontier_coverage_25/group_std_mean": 0.12786214500665666, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017500906018540264, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017500906018540264, "signal/frontier_coverage_5/centered_abs_mean": 0.15162838697433473, "signal/frontier_coverage_5/group_std_mean": 0.19694490134716033, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027141480706632136, "signal/frontier_ece_reward/centered_abs_mean": 0.00985901989042759, "signal/frontier_ece_reward/group_std_mean": 0.012534209899604321, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012323774863034487, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012323774863034487, "step": 165 }, { "calibration/aurc": 0.17559047792121124, "calibration/batch_distribution_entropy": 0.8843507468483722, "calibration/buffer_distribution_entropy": 0.9468903060077588, "calibration/confidence_entropy": 0.4229738822725511, "calibration/coverage@0%": 0.1265655637254902, "calibration/coverage@1%": 0.14765931372549018, "calibration/coverage@10%": 0.48027267156862746, "calibration/coverage@15%": 0.5803094362745098, "calibration/coverage@20%": 0.6623805147058823, "calibration/coverage@25%": 0.7264950980392156, "calibration/coverage@30%": 0.789828431372549, "calibration/coverage@5%": 0.2242218137254902, "calibration/ece": 0.11771892982943855, "calibration/mean_confidence": 0.6129658324860427, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 566.6, "completions/max_terminated_length": 566.6, "completions/mean_length": 188.92958984375, "completions/mean_terminated_length": 188.92958984375, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.544, "grad_norm": 0.0022145204711705446, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 571163001.0, "reward": 1.0449440240859986, "reward_std": 0.0750869557261467, "rewards/accuracy_reward": 0.62900390625, "rewards/brier_reward": 0.8198571324348449, "rewards/confidence_uniqueness_reward": 0.9451698303222656, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.001585884322412312, "rewards/frontier_coverage_1": 0.08185996562242508, "rewards/frontier_coverage_10": 0.08185996562242508, "rewards/frontier_coverage_15": 0.08185996562242508, "rewards/frontier_coverage_20": 0.08185996562242508, "rewards/frontier_coverage_25": 0.0604823037981987, "rewards/frontier_coverage_5": 0.08185996562242508, "rewards/frontier_ece_reward": 0.011854531429708003, "signal/accuracy_reward/centered_abs_mean": 0.102349853515625, "signal/accuracy_reward/group_std_mean": 0.13563383221626282, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0511749267578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0511749267578125, "signal/advantage_abs_mean": 0.05685669779777527, "signal/advantage_pre_scale_abs_mean": 0.05685669779777527, "signal/advantage_pre_scale_std": 0.10516398698091507, "signal/advantage_std": 0.10516398698091507, "signal/brier_reward/centered_abs_mean": 0.11613436192274093, "signal/brier_reward/group_std_mean": 0.14911974966526031, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014516795240342616, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014516795240342616, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02417031079530716, "signal/confidence_uniqueness_reward/group_std_mean": 0.03060316704213619, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003021288849413395, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003021288849413395, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012515761191025376, "signal/frontier_aurc_reward/group_std_mean": 0.001968202483840287, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.240321155113634e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.240321155113634e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14715155959129333, "signal/frontier_coverage_1/group_std_mean": 0.19198558628559112, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_10/centered_abs_mean": 0.14715155959129333, "signal/frontier_coverage_10/group_std_mean": 0.19198558628559112, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_15/centered_abs_mean": 0.14715155959129333, "signal/frontier_coverage_15/group_std_mean": 0.19198558628559112, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_20/centered_abs_mean": 0.14715155959129333, "signal/frontier_coverage_20/group_std_mean": 0.19198558628559112, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_25/centered_abs_mean": 0.09022901803255082, "signal/frontier_coverage_25/group_std_mean": 0.11834533214569092, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016150993760675192, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016150993760675192, "signal/frontier_coverage_5/centered_abs_mean": 0.14715155959129333, "signal/frontier_coverage_5/group_std_mean": 0.19198558628559112, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026340128388255835, "signal/frontier_ece_reward/centered_abs_mean": 0.010609462484717369, "signal/frontier_ece_reward/group_std_mean": 0.01322672814130783, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001326182810589671, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001326182810589671, "step": 170 }, { "calibration/aurc": 0.22587778042468196, "calibration/batch_distribution_entropy": 0.9286465435216072, "calibration/buffer_distribution_entropy": 0.9467257565889398, "calibration/confidence_entropy": 0.4432571774263129, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.42265625, "calibration/coverage@15%": 0.47109375, "calibration/coverage@20%": 0.49453125, "calibration/coverage@25%": 0.65859375, "calibration/coverage@30%": 0.71171875, "calibration/coverage@5%": 0.26875, "calibration/ece": 0.14173538803059738, "calibration/mean_confidence": 0.5470402575486807, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 657.2, "completions/max_terminated_length": 440.8, "completions/mean_length": 186.93251953125, "completions/mean_terminated_length": 186.66925354003905, "completions/min_length": 89.4, "completions/min_terminated_length": 89.4, "epoch": 0.56, "grad_norm": 0.0018579477909952402, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 587898598.0, "reward": 1.0260156869888306, "reward_std": 0.0709751732647419, "rewards/accuracy_reward": 0.57705078125, "rewards/brier_reward": 0.8331053018569946, "rewards/confidence_uniqueness_reward": 0.9434573888778687, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0018543781014159321, "rewards/frontier_coverage_1": 0.13779235035181045, "rewards/frontier_coverage_10": 0.13779235035181045, "rewards/frontier_coverage_15": 0.13779235035181045, "rewards/frontier_coverage_20": 0.13634179830551146, "rewards/frontier_coverage_25": 0.09466939568519592, "rewards/frontier_coverage_5": 0.13779235035181045, "rewards/frontier_ece_reward": 0.012398156523704528, "signal/accuracy_reward/centered_abs_mean": 0.079791259765625, "signal/accuracy_reward/group_std_mean": 0.1126218855381012, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398956298828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0398956298828125, "signal/advantage_abs_mean": 0.05201718434691429, "signal/advantage_pre_scale_abs_mean": 0.05201718434691429, "signal/advantage_pre_scale_std": 0.09894705563783646, "signal/advantage_std": 0.09894705563783646, "signal/brier_reward/centered_abs_mean": 0.1116187259554863, "signal/brier_reward/group_std_mean": 0.14699228405952453, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013952340744435788, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013952340744435788, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02636619359254837, "signal/confidence_uniqueness_reward/group_std_mean": 0.033408934623003005, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032957741990685464, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032957741990685464, "signal/format_reward/centered_abs_mean": 0.0003662109375, "signal/format_reward/group_std_mean": 0.000768545875325799, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00018310546875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00018310546875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014577839057892561, "signal/frontier_aurc_reward/group_std_mean": 0.0023339309729635714, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6094331042259e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6094331042259e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13344906717538835, "signal/frontier_coverage_1/group_std_mean": 0.17753869891166688, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_10/centered_abs_mean": 0.13344906717538835, "signal/frontier_coverage_10/group_std_mean": 0.17753869891166688, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_15/centered_abs_mean": 0.13344906717538835, "signal/frontier_coverage_15/group_std_mean": 0.17753869891166688, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_20/centered_abs_mean": 0.1315935179591179, "signal/frontier_coverage_20/group_std_mean": 0.1751266449689865, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002355523919686675, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002355523919686675, "signal/frontier_coverage_25/centered_abs_mean": 0.08300138115882874, "signal/frontier_coverage_25/group_std_mean": 0.11093302965164184, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00148572470061481, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00148572470061481, "signal/frontier_coverage_5/centered_abs_mean": 0.13344906717538835, "signal/frontier_coverage_5/group_std_mean": 0.17753869891166688, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002388738188892603, "signal/frontier_ece_reward/centered_abs_mean": 0.009788069687783719, "signal/frontier_ece_reward/group_std_mean": 0.012408962100744247, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0012235087109729649, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0012235087109729649, "step": 175 }, { "calibration/aurc": 0.2964656946460727, "calibration/batch_distribution_entropy": 0.9168176730321715, "calibration/buffer_distribution_entropy": 0.9476380754401748, "calibration/confidence_entropy": 0.4257376662970197, "calibration/coverage@0%": 0.11015625, "calibration/coverage@1%": 0.1140625, "calibration/coverage@10%": 0.23984375, "calibration/coverage@15%": 0.32734375, "calibration/coverage@20%": 0.3671875, "calibration/coverage@25%": 0.48046875, "calibration/coverage@30%": 0.578125, "calibration/coverage@5%": 0.165625, "calibration/ece": 0.1658030474792637, "calibration/mean_confidence": 0.5411404430872265, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1305.2, "completions/max_terminated_length": 468.2, "completions/mean_length": 187.0533203125, "completions/mean_terminated_length": 186.39432373046876, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 0.576, "grad_norm": 0.3020451068878174, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 605000648.0, "reward": 1.030449903011322, "reward_std": 0.06246692091226578, "rewards/accuracy_reward": 0.59111328125, "rewards/brier_reward": 0.8259658694267273, "rewards/confidence_uniqueness_reward": 0.9397091507911682, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0018171647796407342, "rewards/frontier_coverage_1": 0.1292146548628807, "rewards/frontier_coverage_10": 0.1292146548628807, "rewards/frontier_coverage_15": 0.1292146548628807, "rewards/frontier_coverage_20": 0.12564596012234688, "rewards/frontier_coverage_25": 0.08896546289324761, "rewards/frontier_coverage_5": 0.1292146548628807, "rewards/frontier_ece_reward": 0.011719273403286934, "signal/accuracy_reward/centered_abs_mean": 0.066680908203125, "signal/accuracy_reward/group_std_mean": 0.09644376039505005, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0333404541015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0333404541015625, "signal/advantage_abs_mean": 0.04487398453056812, "signal/advantage_pre_scale_abs_mean": 0.04487398453056812, "signal/advantage_pre_scale_std": 0.08947417140007019, "signal/advantage_std": 0.08947417140007019, "signal/brier_reward/centered_abs_mean": 0.10441422760486603, "signal/brier_reward/group_std_mean": 0.13690564334392546, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013051778450608254, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013051778450608254, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028500469401478767, "signal/confidence_uniqueness_reward/group_std_mean": 0.03677135743200779, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003562558675184846, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003562558675184846, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.0031943732406944036, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012849176069721579, "signal/frontier_aurc_reward/group_std_mean": 0.001977930567227304, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3000024521024896e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3000024521024896e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13109023869037628, "signal/frontier_coverage_1/group_std_mean": 0.1723720222711563, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_10/centered_abs_mean": 0.13109023869037628, "signal/frontier_coverage_10/group_std_mean": 0.1723720222711563, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_15/centered_abs_mean": 0.13109023869037628, "signal/frontier_coverage_15/group_std_mean": 0.1723720222711563, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_20/centered_abs_mean": 0.1257859319448471, "signal/frontier_coverage_20/group_std_mean": 0.16545325815677642, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022515680640935896, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022515680640935896, "signal/frontier_coverage_25/centered_abs_mean": 0.08115749582648277, "signal/frontier_coverage_25/group_std_mean": 0.10648612678050995, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014527191407978535, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014527191407978535, "signal/frontier_coverage_5/centered_abs_mean": 0.13109023869037628, "signal/frontier_coverage_5/group_std_mean": 0.1723720222711563, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023465151432901623, "signal/frontier_ece_reward/centered_abs_mean": 0.00884333048015833, "signal/frontier_ece_reward/group_std_mean": 0.011235564388334751, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011054163100197912, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011054163100197912, "step": 180 }, { "calibration/aurc": 0.3515624446654767, "calibration/batch_distribution_entropy": 0.8888446214443768, "calibration/buffer_distribution_entropy": 0.9482682908968807, "calibration/confidence_entropy": 0.4005931618908181, "calibration/coverage@0%": 0.07814644607843138, "calibration/coverage@1%": 0.07814644607843138, "calibration/coverage@10%": 0.252469362745098, "calibration/coverage@15%": 0.3150183823529412, "calibration/coverage@20%": 0.3674172794117647, "calibration/coverage@25%": 0.41276654411764707, "calibration/coverage@30%": 0.5190686274509804, "calibration/coverage@5%": 0.15006740196078433, "calibration/ece": 0.12824287845261145, "calibration/mean_confidence": 0.5009771199979778, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1110.8, "completions/max_terminated_length": 451.4, "completions/mean_length": 186.34189453125, "completions/mean_terminated_length": 185.94619750976562, "completions/min_length": 85.4, "completions/min_terminated_length": 85.4, "epoch": 0.592, "grad_norm": 0.0023227103520184755, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 622076501.0, "reward": 1.0253287315368653, "reward_std": 0.07493966221809387, "rewards/accuracy_reward": 0.584375, "rewards/brier_reward": 0.8193248152732849, "rewards/confidence_uniqueness_reward": 0.9278930783271789, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0017773719038814307, "rewards/frontier_coverage_1": 0.1330685704946518, "rewards/frontier_coverage_10": 0.1330685704946518, "rewards/frontier_coverage_15": 0.1330685704946518, "rewards/frontier_coverage_20": 0.1282924994826317, "rewards/frontier_coverage_25": 0.0903812974691391, "rewards/frontier_coverage_5": 0.1330685704946518, "rewards/frontier_ece_reward": 0.011802474223077297, "signal/accuracy_reward/centered_abs_mean": 0.09986572265625, "signal/accuracy_reward/group_std_mean": 0.1311745300889015, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049932861328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049932861328125, "signal/advantage_abs_mean": 0.05721670612692833, "signal/advantage_pre_scale_abs_mean": 0.05721670612692833, "signal/advantage_pre_scale_std": 0.10559385418891906, "signal/advantage_std": 0.10559385418891906, "signal/brier_reward/centered_abs_mean": 0.11419818848371506, "signal/brier_reward/group_std_mean": 0.14777041971683502, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014274773560464383, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014274773560464383, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03568760454654694, "signal/confidence_uniqueness_reward/group_std_mean": 0.04578934088349342, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004460950568318367, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004460950568318367, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014281244948506355, "signal/frontier_aurc_reward/group_std_mean": 0.0022319577634334563, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.55634276982164e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.55634276982164e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14765068590641023, "signal/frontier_coverage_1/group_std_mean": 0.1923435479402542, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_10/centered_abs_mean": 0.14765068590641023, "signal/frontier_coverage_10/group_std_mean": 0.1923435479402542, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_15/centered_abs_mean": 0.14765068590641023, "signal/frontier_coverage_15/group_std_mean": 0.1923435479402542, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_20/centered_abs_mean": 0.13963269293308259, "signal/frontier_coverage_20/group_std_mean": 0.18203844726085663, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002499425271525979, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002499425271525979, "signal/frontier_coverage_25/centered_abs_mean": 0.08843920975923539, "signal/frontier_coverage_25/group_std_mean": 0.11563192903995514, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015830618096515537, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015830618096515537, "signal/frontier_coverage_5/centered_abs_mean": 0.14765068590641023, "signal/frontier_coverage_5/group_std_mean": 0.1923435479402542, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026429472491145134, "signal/frontier_ece_reward/centered_abs_mean": 0.008998825587332249, "signal/frontier_ece_reward/group_std_mean": 0.011416062340140342, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011248531984165311, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011248531984165311, "step": 185 }, { "calibration/aurc": 0.19760200424062405, "calibration/batch_distribution_entropy": 0.843288138266779, "calibration/buffer_distribution_entropy": 0.9480241421807527, "calibration/confidence_entropy": 0.3636719512707667, "calibration/coverage@0%": 0.2765625, "calibration/coverage@1%": 0.2984375, "calibration/coverage@10%": 0.5328125, "calibration/coverage@15%": 0.6515625, "calibration/coverage@20%": 0.70703125, "calibration/coverage@25%": 0.75, "calibration/coverage@30%": 0.78125, "calibration/coverage@5%": 0.44375, "calibration/ece": 0.16141309983740632, "calibration/mean_confidence": 0.5030459626625937, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 570.6, "completions/max_terminated_length": 570.6, "completions/mean_length": 187.1, "completions/mean_terminated_length": 187.1, "completions/min_length": 89.4, "completions/min_terminated_length": 89.4, "epoch": 0.608, "grad_norm": 0.0017206113552674651, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 638991893.0, "reward": 1.0279302835464477, "reward_std": 0.05821175277233124, "rewards/accuracy_reward": 0.5734375, "rewards/brier_reward": 0.8467367172241211, "rewards/confidence_uniqueness_reward": 0.9279510498046875, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.001405553543008864, "rewards/frontier_coverage_1": 0.17639144659042358, "rewards/frontier_coverage_10": 0.17639144659042358, "rewards/frontier_coverage_15": 0.17639144659042358, "rewards/frontier_coverage_20": 0.17281466871500015, "rewards/frontier_coverage_25": 0.11612609624862671, "rewards/frontier_coverage_5": 0.17639144659042358, "rewards/frontier_ece_reward": 0.012792413122951984, "signal/accuracy_reward/centered_abs_mean": 0.081982421875, "signal/accuracy_reward/group_std_mean": 0.10979770123958588, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0409912109375, "signal/advantage_abs_mean": 0.043833667784929274, "signal/advantage_pre_scale_abs_mean": 0.043833667784929274, "signal/advantage_pre_scale_std": 0.08639424741268158, "signal/advantage_std": 0.08639424741268158, "signal/brier_reward/centered_abs_mean": 0.10321188867092132, "signal/brier_reward/group_std_mean": 0.1343301758170128, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012901486083865165, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012901486083865165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03328895568847656, "signal/confidence_uniqueness_reward/group_std_mean": 0.0417523019015789, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00416111946105957, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00416111946105957, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0010029817116446794, "signal/frontier_aurc_reward/group_std_mean": 0.0015215349150821567, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.7953372116608078e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.7953372116608078e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14954022765159608, "signal/frontier_coverage_1/group_std_mean": 0.19398094117641448, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_10/centered_abs_mean": 0.14954022765159608, "signal/frontier_coverage_10/group_std_mean": 0.19398094117641448, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_15/centered_abs_mean": 0.14954022765159608, "signal/frontier_coverage_15/group_std_mean": 0.19398094117641448, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_20/centered_abs_mean": 0.14172202944755555, "signal/frontier_coverage_20/group_std_mean": 0.18404050469398497, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002536824205890298, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002536824205890298, "signal/frontier_coverage_25/centered_abs_mean": 0.08968007564544678, "signal/frontier_coverage_25/group_std_mean": 0.11650702059268951, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016052733408287168, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016052733408287168, "signal/frontier_coverage_5/centered_abs_mean": 0.14954022765159608, "signal/frontier_coverage_5/group_std_mean": 0.19398094117641448, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002676770044490695, "signal/frontier_ece_reward/centered_abs_mean": 0.008176222257316113, "signal/frontier_ece_reward/group_std_mean": 0.010366989858448505, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010220277821645142, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010220277821645142, "step": 190 }, { "calibration/aurc": 0.16803935733946962, "calibration/batch_distribution_entropy": 0.8950542863389088, "calibration/buffer_distribution_entropy": 0.9478421140046102, "calibration/confidence_entropy": 0.4051885790306386, "calibration/coverage@0%": 0.16640625, "calibration/coverage@1%": 0.17109375, "calibration/coverage@10%": 0.540625, "calibration/coverage@15%": 0.609375, "calibration/coverage@20%": 0.67265625, "calibration/coverage@25%": 0.71484375, "calibration/coverage@30%": 0.75390625, "calibration/coverage@5%": 0.41953125, "calibration/ece": 0.11327187991929444, "calibration/mean_confidence": 0.5317124950807055, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 713.6, "completions/max_terminated_length": 501.8, "completions/mean_length": 188.14892578125, "completions/mean_terminated_length": 188.01675415039062, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.624, "grad_norm": 0.0019219900714233518, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 656262442.0, "reward": 1.0314565539360045, "reward_std": 0.06776490807533264, "rewards/accuracy_reward": 0.58193359375, "rewards/brier_reward": 0.8456698775291442, "rewards/confidence_uniqueness_reward": 0.93500657081604, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0015101159922778606, "rewards/frontier_coverage_1": 0.16571835279464722, "rewards/frontier_coverage_10": 0.16571835279464722, "rewards/frontier_coverage_15": 0.16571835279464722, "rewards/frontier_coverage_20": 0.15580750107765198, "rewards/frontier_coverage_25": 0.10798413306474686, "rewards/frontier_coverage_5": 0.16571835279464722, "rewards/frontier_ece_reward": 0.011540688015520573, "signal/accuracy_reward/centered_abs_mean": 0.094268798828125, "signal/accuracy_reward/group_std_mean": 0.12409499287605286, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0471343994140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0471343994140625, "signal/advantage_abs_mean": 0.05176782011985779, "signal/advantage_pre_scale_abs_mean": 0.05176782011985779, "signal/advantage_pre_scale_std": 0.09750174582004548, "signal/advantage_std": 0.09750174582004548, "signal/brier_reward/centered_abs_mean": 0.10846467316150665, "signal/brier_reward/group_std_mean": 0.14180308282375337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013558084145188332, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013558084145188332, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030897776782512664, "signal/confidence_uniqueness_reward/group_std_mean": 0.03884159214794636, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003862222097814083, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003862222097814083, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011383457691408693, "signal/frontier_aurc_reward/group_std_mean": 0.0017884798115119338, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.03763887839159e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.03763887839159e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14944371283054353, "signal/frontier_coverage_1/group_std_mean": 0.19577408730983734, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_10/centered_abs_mean": 0.14944371283054353, "signal/frontier_coverage_10/group_std_mean": 0.19577408730983734, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_15/centered_abs_mean": 0.14944371283054353, "signal/frontier_coverage_15/group_std_mean": 0.19577408730983734, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_20/centered_abs_mean": 0.1349347472190857, "signal/frontier_coverage_20/group_std_mean": 0.17690467536449433, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002415331965312362, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002415331965312362, "signal/frontier_coverage_25/centered_abs_mean": 0.08353340923786164, "signal/frontier_coverage_25/group_std_mean": 0.10984267294406891, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014952480327337981, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014952480327337981, "signal/frontier_coverage_5/centered_abs_mean": 0.14944371283054353, "signal/frontier_coverage_5/group_std_mean": 0.19577408730983734, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002675042301416397, "signal/frontier_ece_reward/centered_abs_mean": 0.007968425843864679, "signal/frontier_ece_reward/group_std_mean": 0.010179330036044121, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009960532304830849, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009960532304830849, "step": 195 }, { "calibration/aurc": 0.20889160874971036, "calibration/batch_distribution_entropy": 0.9089865199203135, "calibration/buffer_distribution_entropy": 0.9479906632560786, "calibration/confidence_entropy": 0.4287082953337954, "calibration/coverage@0%": 0.09921875, "calibration/coverage@1%": 0.11796875, "calibration/coverage@10%": 0.40703125, "calibration/coverage@15%": 0.46484375, "calibration/coverage@20%": 0.63671875, "calibration/coverage@25%": 0.6953125, "calibration/coverage@30%": 0.75078125, "calibration/coverage@5%": 0.3515625, "calibration/ece": 0.1674316742601562, "calibration/mean_confidence": 0.5789995757398437, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 869.4, "completions/max_terminated_length": 445.4, "completions/mean_length": 196.52490234375, "completions/mean_terminated_length": 196.00135192871093, "completions/min_length": 96.2, "completions/min_terminated_length": 96.2, "epoch": 0.64, "grad_norm": 0.0018030045321211219, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 673617545.0, "reward": 1.0461254596710206, "reward_std": 0.057253798097372056, "rewards/accuracy_reward": 0.6255859375, "rewards/brier_reward": 0.8291385531425476, "rewards/confidence_uniqueness_reward": 0.9392925262451172, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.001513039879500866, "rewards/frontier_coverage_1": 0.11257308274507523, "rewards/frontier_coverage_10": 0.11257308274507523, "rewards/frontier_coverage_15": 0.11257308274507523, "rewards/frontier_coverage_20": 0.10006719529628753, "rewards/frontier_coverage_25": 0.07489581555128097, "rewards/frontier_coverage_5": 0.11257308274507523, "rewards/frontier_ece_reward": 0.010471446067094803, "signal/accuracy_reward/centered_abs_mean": 0.06611328125, "signal/accuracy_reward/group_std_mean": 0.09317785650491714, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.033056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.033056640625, "signal/advantage_abs_mean": 0.04163134917616844, "signal/advantage_pre_scale_abs_mean": 0.04163134917616844, "signal/advantage_pre_scale_std": 0.0858407735824585, "signal/advantage_std": 0.0858407735824585, "signal/brier_reward/centered_abs_mean": 0.09937669783830642, "signal/brier_reward/group_std_mean": 0.13064824044704437, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012422087229788303, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012422087229788303, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02863082177937031, "signal/confidence_uniqueness_reward/group_std_mean": 0.03645128607749939, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035788527224212886, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035788527224212886, "signal/format_reward/centered_abs_mean": 0.00072021484375, "signal/format_reward/group_std_mean": 0.0014778789598494768, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000360107421875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000360107421875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011738982051610948, "signal/frontier_aurc_reward/group_std_mean": 0.0018677733605727553, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1012776051065886e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1012776051065886e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12463102638721466, "signal/frontier_coverage_1/group_std_mean": 0.16538253724575042, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_10/centered_abs_mean": 0.12463102638721466, "signal/frontier_coverage_10/group_std_mean": 0.16538253724575042, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_15/centered_abs_mean": 0.12463102638721466, "signal/frontier_coverage_15/group_std_mean": 0.16538253724575042, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_20/centered_abs_mean": 0.10897718667984009, "signal/frontier_coverage_20/group_std_mean": 0.1449252337217331, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019506915938109159, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019506915938109159, "signal/frontier_coverage_25/centered_abs_mean": 0.06840595453977585, "signal/frontier_coverage_25/group_std_mean": 0.0906538799405098, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001224466529674828, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001224466529674828, "signal/frontier_coverage_5/centered_abs_mean": 0.12463102638721466, "signal/frontier_coverage_5/group_std_mean": 0.16538253724575042, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022308954037725927, "signal/frontier_ece_reward/centered_abs_mean": 0.007416488416492939, "signal/frontier_ece_reward/group_std_mean": 0.009534438140690327, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009270610520616174, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009270610520616174, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.3420038089344068, "eval_calibration/batch_distribution_entropy": 0.9040885318071357, "eval_calibration/buffer_distribution_entropy": 0.9499923798968546, "eval_calibration/confidence_entropy": 0.43239845984155695, "eval_calibration/coverage@0%": 0.125, "eval_calibration/coverage@1%": 0.125, "eval_calibration/coverage@10%": 0.203125, "eval_calibration/coverage@15%": 0.203125, "eval_calibration/coverage@20%": 0.28125, "eval_calibration/coverage@25%": 0.359375, "eval_calibration/coverage@30%": 0.390625, "eval_calibration/coverage@5%": 0.125, "eval_calibration/ece": 0.13722656249999998, "eval_calibration/mean_confidence": 0.4819140625, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 964.5, "eval_completions/max_terminated_length": 382.0, "eval_completions/mean_length": 200.27852630615234, "eval_completions/mean_terminated_length": 197.66400146484375, "eval_completions/min_length": 107.0, "eval_completions/min_terminated_length": 107.0, "eval_loss": 0.0, "eval_num_tokens": 673617545.0, "eval_reward": 0.9530201256275177, "eval_reward_std": 0.23500938713550568, "eval_rewards/accuracy_reward": 0.439453125, "eval_rewards/brier_reward": 0.8035316169261932, "eval_rewards/confidence_uniqueness_reward": 0.889798104763031, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_aurc_reward": -0.002739873481914401, "eval_rewards/frontier_coverage_1": 0.22667521983385086, "eval_rewards/frontier_coverage_10": 0.22667521983385086, "eval_rewards/frontier_coverage_15": 0.22667521983385086, "eval_rewards/frontier_coverage_20": 0.1793447956442833, "eval_rewards/frontier_coverage_25": 0.11752147227525711, "eval_rewards/frontier_coverage_5": 0.22667521983385086, "eval_rewards/frontier_ece_reward": 0.008872916921973228, "eval_runtime": 19.7075, "eval_samples_per_second": 25.371, "eval_signal/accuracy_reward/centered_abs_mean": 0.4754638671875, "eval_signal/accuracy_reward/group_std_mean": 0.49497611820697784, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23773193359375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23773193359375, "eval_signal/advantage_abs_mean": 0.21505261212587357, "eval_signal/advantage_pre_scale_abs_mean": 0.21505261212587357, "eval_signal/advantage_pre_scale_std": 0.2322075515985489, "eval_signal/advantage_std": 0.2322075515985489, "eval_signal/brier_reward/centered_abs_mean": 0.22212185710668564, "eval_signal/brier_reward/group_std_mean": 0.27625299990177155, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027765232138335705, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.027765232138335705, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04621247202157974, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.059811294078826904, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005776559002697468, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005776559002697468, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003157320083118975, "eval_signal/frontier_aurc_reward/group_std_mean": 0.005664329044520855, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.6516028053010814e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.6516028053010814e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.382648304104805, "eval_signal/frontier_coverage_1/group_std_mean": 0.46173766255378723, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.382648304104805, "eval_signal/frontier_coverage_10/group_std_mean": 0.46173766255378723, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.382648304104805, "eval_signal/frontier_coverage_15/group_std_mean": 0.46173766255378723, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2975280433893204, "eval_signal/frontier_coverage_20/group_std_mean": 0.36239591240882874, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005325751379132271, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005325751379132271, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1779879480600357, "eval_signal/frontier_coverage_25/group_std_mean": 0.22136619687080383, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031859842129051685, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031859842129051685, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.382648304104805, "eval_signal/frontier_coverage_5/group_std_mean": 0.46173766255378723, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0068494039587676525, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.013139450456947088, "eval_signal/frontier_ece_reward/group_std_mean": 0.015696686692535877, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001642431307118386, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001642431307118386, "eval_steps_per_second": 0.101, "step": 200 }, { "epoch": 0.64, "step": 200, "train_probe_calibration/aurc": 0.14380049777966852, "train_probe_calibration/batch_distribution_entropy": 0.8924841198751866, "train_probe_calibration/buffer_distribution_entropy": 0.9501962946765351, "train_probe_calibration/confidence_entropy": 0.4064653950766257, "train_probe_calibration/coverage@0%": 0.0625, "train_probe_calibration/coverage@1%": 0.0625, "train_probe_calibration/coverage@10%": 0.6875, "train_probe_calibration/coverage@15%": 0.796875, "train_probe_calibration/coverage@20%": 0.875, "train_probe_calibration/coverage@25%": 0.921875, "train_probe_calibration/coverage@30%": 0.96875, "train_probe_calibration/coverage@5%": 0.0625, "train_probe_calibration/ece": 0.21375468749999998, "train_probe_calibration/mean_confidence": 0.5793390625, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 316.5, "train_probe_completions/max_terminated_length": 316.5, "train_probe_completions/mean_length": 195.48914337158203, "train_probe_completions/mean_terminated_length": 195.48914337158203, "train_probe_completions/min_length": 101.0, "train_probe_completions/min_terminated_length": 101.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 673617545.0, "train_probe_reward": 1.0627794861793518, "train_probe_reward_std": 0.2138308882713318, "train_probe_rewards/accuracy_reward": 0.66796875, "train_probe_rewards/brier_reward": 0.8411527872085571, "train_probe_rewards/confidence_uniqueness_reward": 0.90185546875, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0011366689577698708, "train_probe_rewards/frontier_coverage_1": 0.09746142104268074, "train_probe_rewards/frontier_coverage_10": 0.09746142104268074, "train_probe_rewards/frontier_coverage_15": 0.09746142104268074, "train_probe_rewards/frontier_coverage_20": 0.08396613597869873, "train_probe_rewards/frontier_coverage_25": 0.06902317516505718, "train_probe_rewards/frontier_coverage_5": 0.09746142104268074, "train_probe_rewards/frontier_ece_reward": 0.009781356435269117, "train_probe_runtime": 9.0902, "train_probe_samples_per_second": 55.004, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.434326171875, "train_probe_signal/accuracy_reward/group_std_mean": 0.4729345738887787, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2171630859375, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2171630859375, "train_probe_signal/advantage_abs_mean": 0.19102784246206284, "train_probe_signal/advantage_pre_scale_abs_mean": 0.19102784246206284, "train_probe_signal/advantage_pre_scale_std": 0.21111667901277542, "train_probe_signal/advantage_std": 0.21111667901277542, "train_probe_signal/brier_reward/centered_abs_mean": 0.1861182525753975, "train_probe_signal/brier_reward/group_std_mean": 0.24657447636127472, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023264781571924686, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.023264781571924686, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039306640625, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.046158455312252045, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004913330078125, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004913330078125, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0019272951176390052, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0035583705175668, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.449858195381239e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.449858195381239e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3332698345184326, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.45197173953056335, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3332698345184326, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.45197173953056335, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3332698345184326, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.45197173953056335, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.256347618997097, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.35391244292259216, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0045886223670095205, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0045886223670095205, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1468118354678154, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.20914901793003082, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026279317680746317, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026279317680746317, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3332698345184326, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.45197173953056335, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005965529475361109, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.012552765663713217, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.014878344256430864, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001569095707964152, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001569095707964152, "train_probe_steps_per_second": 0.22 }, { "calibration/aurc": 0.24817290495089597, "calibration/batch_distribution_entropy": 0.9204595927573562, "calibration/buffer_distribution_entropy": 0.9519832870818661, "calibration/confidence_entropy": 0.4577226375814408, "calibration/coverage@0%": 0.025, "calibration/coverage@1%": 0.1078125, "calibration/coverage@10%": 0.3125, "calibration/coverage@15%": 0.35234375, "calibration/coverage@20%": 0.4484375, "calibration/coverage@25%": 0.49609375, "calibration/coverage@30%": 0.6015625, "calibration/coverage@5%": 0.240625, "calibration/ece": 0.1720056808846374, "calibration/mean_confidence": 0.5100622878653626, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 656.8, "completions/max_terminated_length": 496.6, "completions/mean_length": 199.54921875, "completions/mean_terminated_length": 199.4187744140625, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.656, "grad_norm": 0.0016656734514981508, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 690517473.0, "reward": 1.0183162331581115, "reward_std": 0.06802579239010811, "rewards/accuracy_reward": 0.57275390625, "rewards/brier_reward": 0.8095171332359314, "rewards/confidence_uniqueness_reward": 0.94515380859375, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0018861858639866113, "rewards/frontier_coverage_1": 0.12252766788005828, "rewards/frontier_coverage_10": 0.12252766788005828, "rewards/frontier_coverage_15": 0.12252766788005828, "rewards/frontier_coverage_20": 0.09871871173381805, "rewards/frontier_coverage_25": 0.0694797769188881, "rewards/frontier_coverage_5": 0.12252766788005828, "rewards/frontier_ece_reward": 0.007234203815460205, "signal/accuracy_reward/centered_abs_mean": 0.083831787109375, "signal/accuracy_reward/group_std_mean": 0.11424745023250579, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419158935546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0419158935546875, "signal/advantage_abs_mean": 0.05140817314386368, "signal/advantage_pre_scale_abs_mean": 0.05140817314386368, "signal/advantage_pre_scale_std": 0.09806035608053207, "signal/advantage_std": 0.09806035608053207, "signal/brier_reward/centered_abs_mean": 0.1077189490199089, "signal/brier_reward/group_std_mean": 0.13838136196136475, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013464868627488613, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013464868627488613, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421807125210762, "signal/confidence_uniqueness_reward/group_std_mean": 0.030718856677412986, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030272589065134525, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030272589065134525, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013030647998675704, "signal/frontier_aurc_reward/group_std_mean": 0.0020229590591043234, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3324858921114357e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3324858921114357e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13521387726068496, "signal/frontier_coverage_1/group_std_mean": 0.17738903760910035, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_10/centered_abs_mean": 0.13521387726068496, "signal/frontier_coverage_10/group_std_mean": 0.17738903760910035, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_15/centered_abs_mean": 0.13521387726068496, "signal/frontier_coverage_15/group_std_mean": 0.17738903760910035, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_20/centered_abs_mean": 0.09841903001070022, "signal/frontier_coverage_20/group_std_mean": 0.129475200176239, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017617005854845048, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017617005854845048, "signal/frontier_coverage_25/centered_abs_mean": 0.06278965771198272, "signal/frontier_coverage_25/group_std_mean": 0.08188406601548195, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011239348677918315, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011239348677918315, "signal/frontier_coverage_5/centered_abs_mean": 0.13521387726068496, "signal/frontier_coverage_5/group_std_mean": 0.17738903760910035, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002420328464359045, "signal/frontier_ece_reward/centered_abs_mean": 0.006564310565590858, "signal/frontier_ece_reward/group_std_mean": 0.008403288014233113, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008205388206988573, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008205388206988573, "step": 205 }, { "calibration/aurc": 0.23136369662906525, "calibration/batch_distribution_entropy": 0.8958516870167121, "calibration/buffer_distribution_entropy": 0.9554491916207283, "calibration/confidence_entropy": 0.43047974707442044, "calibration/coverage@0%": 0.07734375, "calibration/coverage@1%": 0.07734375, "calibration/coverage@10%": 0.31953125, "calibration/coverage@15%": 0.3921875, "calibration/coverage@20%": 0.44375, "calibration/coverage@25%": 0.5265625, "calibration/coverage@30%": 0.60625, "calibration/coverage@5%": 0.23203125, "calibration/ece": 0.1621687781762295, "calibration/mean_confidence": 0.5560812218237705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 436.2, "completions/max_terminated_length": 436.2, "completions/mean_length": 204.09091796875, "completions/mean_terminated_length": 204.09091796875, "completions/min_length": 102.6, "completions/min_terminated_length": 102.6, "epoch": 0.672, "grad_norm": 0.0015550514217466116, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 707520804.0, "reward": 1.026635193824768, "reward_std": 0.06110656931996346, "rewards/accuracy_reward": 0.5787109375, "rewards/brier_reward": 0.8387270212173462, "rewards/confidence_uniqueness_reward": 0.9363265991210937, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00141967604868114, "rewards/frontier_coverage_1": 0.15640246868133545, "rewards/frontier_coverage_10": 0.15640246868133545, "rewards/frontier_coverage_15": 0.1499548703432083, "rewards/frontier_coverage_20": 0.10669813752174377, "rewards/frontier_coverage_25": 0.08252269625663758, "rewards/frontier_coverage_5": 0.15640246868133545, "rewards/frontier_ece_reward": 0.0076270273886621, "signal/accuracy_reward/centered_abs_mean": 0.0878173828125, "signal/accuracy_reward/group_std_mean": 0.1166835829615593, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04390869140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04390869140625, "signal/advantage_abs_mean": 0.04624823108315468, "signal/advantage_pre_scale_abs_mean": 0.04624823108315468, "signal/advantage_pre_scale_std": 0.09073985815048217, "signal/advantage_std": 0.09073985815048217, "signal/brier_reward/centered_abs_mean": 0.1011570304632187, "signal/brier_reward/group_std_mean": 0.13063574135303496, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012644628807902337, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012644628807902337, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028185939788818358, "signal/confidence_uniqueness_reward/group_std_mean": 0.03486784622073173, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035232424736022947, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035232424736022947, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00101193260634318, "signal/frontier_aurc_reward/group_std_mean": 0.0015801386674866081, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.8113592523150147e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.8113592523150147e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14319152235984803, "signal/frontier_coverage_1/group_std_mean": 0.18724198639392853, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_coverage_10/centered_abs_mean": 0.14319152235984803, "signal/frontier_coverage_10/group_std_mean": 0.18724198639392853, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_coverage_15/centered_abs_mean": 0.13852950036525727, "signal/frontier_coverage_15/group_std_mean": 0.18155628740787505, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024796778801828624, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024796778801828624, "signal/frontier_coverage_20/centered_abs_mean": 0.08701228499412536, "signal/frontier_coverage_20/group_std_mean": 0.11472053080797195, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00155751989223063, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00155751989223063, "signal/frontier_coverage_25/centered_abs_mean": 0.05806139260530472, "signal/frontier_coverage_25/group_std_mean": 0.07511216998100281, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010392988799139858, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010392988799139858, "signal/frontier_coverage_5/centered_abs_mean": 0.14319152235984803, "signal/frontier_coverage_5/group_std_mean": 0.18724198639392853, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025631281081587077, "signal/frontier_ece_reward/centered_abs_mean": 0.005760752130299807, "signal/frontier_ece_reward/group_std_mean": 0.007287882454693318, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007200940162874758, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007200940162874758, "step": 210 }, { "calibration/aurc": 0.2169351861265752, "calibration/batch_distribution_entropy": 0.906085427880875, "calibration/buffer_distribution_entropy": 0.95874229714237, "calibration/confidence_entropy": 0.4680126493425715, "calibration/coverage@0%": 0.09546875, "calibration/coverage@1%": 0.09546875, "calibration/coverage@10%": 0.4090379901960784, "calibration/coverage@15%": 0.5012714460784313, "calibration/coverage@20%": 0.5692738970588235, "calibration/coverage@25%": 0.6333884803921569, "calibration/coverage@30%": 0.7037837009803922, "calibration/coverage@5%": 0.2745772058823529, "calibration/ece": 0.1512060885389182, "calibration/mean_confidence": 0.5770298367061799, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 668.2, "completions/max_terminated_length": 486.8, "completions/mean_length": 212.39873046875, "completions/mean_terminated_length": 212.26973571777344, "completions/min_length": 104.2, "completions/min_terminated_length": 104.2, "epoch": 0.688, "grad_norm": 0.001888699596747756, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 724649687.0, "reward": 1.0408958196640015, "reward_std": 0.06385916396975518, "rewards/accuracy_reward": 0.61318359375, "rewards/brier_reward": 0.8376299142837524, "rewards/confidence_uniqueness_reward": 0.9420908451080322, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.001473946124315262, "rewards/frontier_coverage_1": 0.12142772302031517, "rewards/frontier_coverage_10": 0.12142772302031517, "rewards/frontier_coverage_15": 0.10840724855661392, "rewards/frontier_coverage_20": 0.07632499039173127, "rewards/frontier_coverage_25": 0.0743546724319458, "rewards/frontier_coverage_5": 0.12142772302031517, "rewards/frontier_ece_reward": 0.0060465382412076, "signal/accuracy_reward/centered_abs_mean": 0.085443115234375, "signal/accuracy_reward/group_std_mean": 0.11665472537279128, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0427215576171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0427215576171875, "signal/advantage_abs_mean": 0.04668809846043587, "signal/advantage_pre_scale_abs_mean": 0.04668809846043587, "signal/advantage_pre_scale_std": 0.09376905411481858, "signal/advantage_std": 0.09376905411481858, "signal/brier_reward/centered_abs_mean": 0.09790285527706147, "signal/brier_reward/group_std_mean": 0.1284557342529297, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012237856909632683, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012237856909632683, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02564612701535225, "signal/confidence_uniqueness_reward/group_std_mean": 0.03227175809442997, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003205765876919031, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003205765876919031, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011468618642538786, "signal/frontier_aurc_reward/group_std_mean": 0.0019072068389505148, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0528827008092776e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0528827008092776e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13615999221801758, "signal/frontier_coverage_1/group_std_mean": 0.1743500828742981, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_coverage_10/centered_abs_mean": 0.13615999221801758, "signal/frontier_coverage_10/group_std_mean": 0.1743500828742981, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_coverage_15/centered_abs_mean": 0.11814617216587067, "signal/frontier_coverage_15/group_std_mean": 0.15128694474697113, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002114816382527351, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002114816382527351, "signal/frontier_coverage_20/centered_abs_mean": 0.07219749391078949, "signal/frontier_coverage_20/group_std_mean": 0.09231588244438171, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012923351023346186, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012923351023346186, "signal/frontier_coverage_25/centered_abs_mean": 0.05291619151830673, "signal/frontier_coverage_25/group_std_mean": 0.06708001494407653, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009471998200751841, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009471998200751841, "signal/frontier_coverage_5/centered_abs_mean": 0.13615999221801758, "signal/frontier_coverage_5/group_std_mean": 0.1743500828742981, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024372637271881104, "signal/frontier_ece_reward/centered_abs_mean": 0.004961969796568155, "signal/frontier_ece_reward/group_std_mean": 0.00630278754979372, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006202462245710194, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006202462245710194, "step": 215 }, { "calibration/aurc": 0.11948550052681228, "calibration/batch_distribution_entropy": 0.7879994954219934, "calibration/buffer_distribution_entropy": 0.9586850802270618, "calibration/confidence_entropy": 0.3649859275848222, "calibration/coverage@0%": 0.265625, "calibration/coverage@1%": 0.26796875, "calibration/coverage@10%": 0.496875, "calibration/coverage@15%": 0.634375, "calibration/coverage@20%": 0.78203125, "calibration/coverage@25%": 0.83515625, "calibration/coverage@30%": 0.8984375, "calibration/coverage@5%": 0.3890625, "calibration/ece": 0.13410406490920107, "calibration/mean_confidence": 0.6672344767574656, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 652.2, "completions/max_terminated_length": 434.8, "completions/mean_length": 211.9447265625, "completions/mean_terminated_length": 211.81568603515626, "completions/min_length": 103.6, "completions/min_terminated_length": 103.6, "epoch": 0.704, "grad_norm": 0.0016022155759856105, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 741686145.0, "reward": 1.0412675619125367, "reward_std": 0.060601814091205596, "rewards/accuracy_reward": 0.61259765625, "rewards/brier_reward": 0.8442665100097656, "rewards/confidence_uniqueness_reward": 0.937886118888855, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0017726486083120107, "rewards/frontier_coverage_1": 0.12525941878557206, "rewards/frontier_coverage_10": 0.12525941878557206, "rewards/frontier_coverage_15": 0.10406550467014312, "rewards/frontier_coverage_20": 0.07677052170038223, "rewards/frontier_coverage_25": 0.09108839333057403, "rewards/frontier_coverage_5": 0.12525941878557206, "rewards/frontier_ece_reward": 0.005490910448133946, "signal/accuracy_reward/centered_abs_mean": 0.074908447265625, "signal/accuracy_reward/group_std_mean": 0.09916009157896041, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0374542236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0374542236328125, "signal/advantage_abs_mean": 0.04598864167928696, "signal/advantage_pre_scale_abs_mean": 0.04598864167928696, "signal/advantage_pre_scale_std": 0.09303600341081619, "signal/advantage_std": 0.09303600341081619, "signal/brier_reward/centered_abs_mean": 0.09707934856414795, "signal/brier_reward/group_std_mean": 0.12730673998594283, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012134918570518493, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012134918570518493, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028879277408123016, "signal/confidence_uniqueness_reward/group_std_mean": 0.03636922165751457, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003609909676015377, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003609909676015377, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014877181965857744, "signal/frontier_aurc_reward/group_std_mean": 0.0023924733977764845, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6630155844031833e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6630155844031833e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1195068359375, "signal/frontier_coverage_1/group_std_mean": 0.15731086134910582, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_coverage_10/centered_abs_mean": 0.1195068359375, "signal/frontier_coverage_10/group_std_mean": 0.15731086134910582, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_coverage_15/centered_abs_mean": 0.09091014117002487, "signal/frontier_coverage_15/group_std_mean": 0.1199584573507309, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016272914595901965, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016272914595901965, "signal/frontier_coverage_20/centered_abs_mean": 0.05923491641879082, "signal/frontier_coverage_20/group_std_mean": 0.0771937534213066, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010603050119243561, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010603050119243561, "signal/frontier_coverage_25/centered_abs_mean": 0.053304193913936614, "signal/frontier_coverage_25/group_std_mean": 0.0676953986287117, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009541450766846537, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009541450766846537, "signal/frontier_coverage_5/centered_abs_mean": 0.1195068359375, "signal/frontier_coverage_5/group_std_mean": 0.15731086134910582, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002139172307215631, "signal/frontier_ece_reward/centered_abs_mean": 0.004334048368036747, "signal/frontier_ece_reward/group_std_mean": 0.0055789993144571785, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005417560460045934, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005417560460045934, "step": 220 }, { "calibration/aurc": 0.14470123646878835, "calibration/batch_distribution_entropy": 0.8739225583129173, "calibration/buffer_distribution_entropy": 0.9555200094663506, "calibration/confidence_entropy": 0.391513768008919, "calibration/coverage@0%": 0.0875, "calibration/coverage@1%": 0.0875, "calibration/coverage@10%": 0.5484375, "calibration/coverage@15%": 0.675, "calibration/coverage@20%": 0.72890625, "calibration/coverage@25%": 0.77265625, "calibration/coverage@30%": 0.81953125, "calibration/coverage@5%": 0.31953125, "calibration/ece": 0.15345659905795017, "calibration/mean_confidence": 0.6121585571920498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 562.2, "completions/max_terminated_length": 562.2, "completions/mean_length": 214.69287109375, "completions/mean_terminated_length": 214.69287109375, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.72, "grad_norm": 0.0017858616774901748, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 758894456.0, "reward": 1.0460729122161865, "reward_std": 0.0672881230711937, "rewards/accuracy_reward": 0.62294921875, "rewards/brier_reward": 0.8469637155532836, "rewards/confidence_uniqueness_reward": 0.935894775390625, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.001939373160712421, "rewards/frontier_coverage_1": 0.11669339537620545, "rewards/frontier_coverage_10": 0.11649550646543502, "rewards/frontier_coverage_15": 0.09009001255035401, "rewards/frontier_coverage_20": 0.07372135147452355, "rewards/frontier_coverage_25": 0.1068428099155426, "rewards/frontier_coverage_5": 0.11669339537620545, "rewards/frontier_ece_reward": 0.005344946216791868, "signal/accuracy_reward/centered_abs_mean": 0.082562255859375, "signal/accuracy_reward/group_std_mean": 0.1146527960896492, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412811279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0412811279296875, "signal/advantage_abs_mean": 0.04909345507621765, "signal/advantage_pre_scale_abs_mean": 0.04909345507621765, "signal/advantage_pre_scale_std": 0.10091503411531448, "signal/advantage_std": 0.10091503411531448, "signal/brier_reward/centered_abs_mean": 0.0935791552066803, "signal/brier_reward/group_std_mean": 0.12280905842781067, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011697394400835037, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011697394400835037, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02970266342163086, "signal/confidence_uniqueness_reward/group_std_mean": 0.037385367602109906, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037128329277038574, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037128329277038574, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017587365116924047, "signal/frontier_aurc_reward/group_std_mean": 0.0027441283222287894, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1481383120990356e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1481383120990356e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10815362930297852, "signal/frontier_coverage_1/group_std_mean": 0.14316221177577973, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019359499448910356, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019359499448910356, "signal/frontier_coverage_10/centered_abs_mean": 0.10738050639629364, "signal/frontier_coverage_10/group_std_mean": 0.1421646863222122, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019221110735088587, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019221110735088587, "signal/frontier_coverage_15/centered_abs_mean": 0.0754195511341095, "signal/frontier_coverage_15/group_std_mean": 0.10019035190343857, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013500099536031484, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013500099536031484, "signal/frontier_coverage_20/centered_abs_mean": 0.052413633465766905, "signal/frontier_coverage_20/group_std_mean": 0.0678664654493332, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009382039890624583, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009382039890624583, "signal/frontier_coverage_25/centered_abs_mean": 0.05627275034785271, "signal/frontier_coverage_25/group_std_mean": 0.07161930799484253, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010072821867652237, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010072821867652237, "signal/frontier_coverage_5/centered_abs_mean": 0.10815362930297852, "signal/frontier_coverage_5/group_std_mean": 0.14316221177577973, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019359499448910356, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019359499448910356, "signal/frontier_ece_reward/centered_abs_mean": 0.0037975626531988383, "signal/frontier_ece_reward/group_std_mean": 0.004970707837492228, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004746953316498548, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004746953316498548, "step": 225 }, { "calibration/aurc": 0.10276625605061121, "calibration/batch_distribution_entropy": 0.7755610517250693, "calibration/buffer_distribution_entropy": 0.9506196997878165, "calibration/confidence_entropy": 0.3668382010462838, "calibration/coverage@0%": 0.16171875, "calibration/coverage@1%": 0.171875, "calibration/coverage@10%": 0.60703125, "calibration/coverage@15%": 0.70390625, "calibration/coverage@20%": 0.83125, "calibration/coverage@25%": 0.925, "calibration/coverage@30%": 0.94921875, "calibration/coverage@5%": 0.48046875, "calibration/ece": 0.09331710770831818, "calibration/mean_confidence": 0.7038330452083182, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 523.8, "completions/max_terminated_length": 523.8, "completions/mean_length": 220.8560546875, "completions/mean_terminated_length": 220.8560546875, "completions/min_length": 100.4, "completions/min_terminated_length": 100.4, "epoch": 0.736, "grad_norm": 0.002271299483254552, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 776095606.0, "reward": 1.0492503643035889, "reward_std": 0.06152931973338127, "rewards/accuracy_reward": 0.6287109375, "rewards/brier_reward": 0.8473744511604309, "rewards/confidence_uniqueness_reward": 0.9358478307723999, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0017740631010383368, "rewards/frontier_coverage_1": 0.11950143873691559, "rewards/frontier_coverage_10": 0.11690339148044586, "rewards/frontier_coverage_15": 0.09336267858743667, "rewards/frontier_coverage_20": 0.0775704950094223, "rewards/frontier_coverage_25": 0.11357748061418534, "rewards/frontier_coverage_5": 0.11950143873691559, "rewards/frontier_ece_reward": 0.004873855458572507, "signal/accuracy_reward/centered_abs_mean": 0.0777587890625, "signal/accuracy_reward/group_std_mean": 0.10373825207352638, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125, "signal/advantage_abs_mean": 0.046919054538011554, "signal/advantage_pre_scale_abs_mean": 0.046919054538011554, "signal/advantage_pre_scale_std": 0.0959189236164093, "signal/advantage_std": 0.0959189236164093, "signal/brier_reward/centered_abs_mean": 0.0924751952290535, "signal/brier_reward/group_std_mean": 0.1207397997379303, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011559399403631687, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011559399403631687, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029533731937408447, "signal/confidence_uniqueness_reward/group_std_mean": 0.0374361515045166, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003691716492176056, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003691716492176056, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.001516377995721996, "signal/frontier_aurc_reward/group_std_mean": 0.002370060421526432, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.714316433412023e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.714316433412023e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11420103460550309, "signal/frontier_coverage_1/group_std_mean": 0.15096487402915953, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020441983826458452, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020441983826458452, "signal/frontier_coverage_10/centered_abs_mean": 0.11071749776601791, "signal/frontier_coverage_10/group_std_mean": 0.14640629887580872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019818432396277786, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019818432396277786, "signal/frontier_coverage_15/centered_abs_mean": 0.0779910683631897, "signal/frontier_coverage_15/group_std_mean": 0.1036263257265091, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013960400596261025, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013960400596261025, "signal/frontier_coverage_20/centered_abs_mean": 0.05327008962631226, "signal/frontier_coverage_20/group_std_mean": 0.06925814524292946, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009535345481708646, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009535345481708646, "signal/frontier_coverage_25/centered_abs_mean": 0.05671848207712173, "signal/frontier_coverage_25/group_std_mean": 0.07240066826343536, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001015260792337358, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001015260792337358, "signal/frontier_coverage_5/centered_abs_mean": 0.11420103460550309, "signal/frontier_coverage_5/group_std_mean": 0.15096487402915953, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020441983826458452, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020441983826458452, "signal/frontier_ece_reward/centered_abs_mean": 0.0037134474609047175, "signal/frontier_ece_reward/group_std_mean": 0.004857833497226238, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004641809326130897, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004641809326130897, "step": 230 }, { "calibration/aurc": 0.15163492450002442, "calibration/batch_distribution_entropy": 0.8904229891990763, "calibration/buffer_distribution_entropy": 0.9448809655937878, "calibration/confidence_entropy": 0.3949929669354096, "calibration/coverage@0%": 0.07835171568627451, "calibration/coverage@1%": 0.14788296568627451, "calibration/coverage@10%": 0.5339736519607843, "calibration/coverage@15%": 0.626219362745098, "calibration/coverage@20%": 0.7043719362745098, "calibration/coverage@25%": 0.7840992647058823, "calibration/coverage@30%": 0.8411642156862745, "calibration/coverage@5%": 0.3206341911764706, "calibration/ece": 0.13040471542112503, "calibration/mean_confidence": 0.5816119304946545, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 682.0, "completions/max_terminated_length": 494.4, "completions/mean_length": 218.71201171875, "completions/mean_terminated_length": 218.4533935546875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 0.752, "grad_norm": 0.001796262338757515, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 793562417.0, "reward": 1.0391303777694703, "reward_std": 0.06580677628517151, "rewards/accuracy_reward": 0.61123046875, "rewards/brier_reward": 0.8342607021331787, "rewards/confidence_uniqueness_reward": 0.9415198564529419, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0019529166864231228, "rewards/frontier_coverage_1": 0.1229474276304245, "rewards/frontier_coverage_10": 0.1206100896000862, "rewards/frontier_coverage_15": 0.09027208015322685, "rewards/frontier_coverage_20": 0.07027497664093971, "rewards/frontier_coverage_25": 0.09396415501832962, "rewards/frontier_coverage_5": 0.1229474276304245, "rewards/frontier_ece_reward": 0.004472200945019722, "signal/accuracy_reward/centered_abs_mean": 0.080157470703125, "signal/accuracy_reward/group_std_mean": 0.10887984037399293, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400787353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0400787353515625, "signal/advantage_abs_mean": 0.04903002083301544, "signal/advantage_pre_scale_abs_mean": 0.04903002083301544, "signal/advantage_pre_scale_std": 0.1000540629029274, "signal/advantage_std": 0.1000540629029274, "signal/brier_reward/centered_abs_mean": 0.09556291699409485, "signal/brier_reward/group_std_mean": 0.124751777946949, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011945364624261856, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011945364624261856, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026840757578611374, "signal/confidence_uniqueness_reward/group_std_mean": 0.033848896622657776, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033550946973264217, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033550946973264217, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001602224470116198, "signal/frontier_aurc_reward/group_std_mean": 0.002494157268665731, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8679816023213788e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8679816023213788e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11630584448575973, "signal/frontier_coverage_1/group_std_mean": 0.15271863341331482, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002081874618306756, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002081874618306756, "signal/frontier_coverage_10/centered_abs_mean": 0.11303210407495498, "signal/frontier_coverage_10/group_std_mean": 0.14861542731523514, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002023274498060346, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002023274498060346, "signal/frontier_coverage_15/centered_abs_mean": 0.07724076434969902, "signal/frontier_coverage_15/group_std_mean": 0.10189598947763442, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013826095964759588, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013826095964759588, "signal/frontier_coverage_20/centered_abs_mean": 0.05278810262680054, "signal/frontier_coverage_20/group_std_mean": 0.06799793317914009, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009449069970287382, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009449069970287382, "signal/frontier_coverage_25/centered_abs_mean": 0.05864310711622238, "signal/frontier_coverage_25/group_std_mean": 0.0744215801358223, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010497116250917315, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010497116250917315, "signal/frontier_coverage_5/centered_abs_mean": 0.11630584448575973, "signal/frontier_coverage_5/group_std_mean": 0.15271863341331482, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002081874618306756, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002081874618306756, "signal/frontier_ece_reward/centered_abs_mean": 0.0036135178990662096, "signal/frontier_ece_reward/group_std_mean": 0.0046929454430937765, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004516897373832762, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004516897373832762, "step": 235 }, { "calibration/aurc": 0.1354460653145263, "calibration/batch_distribution_entropy": 0.908943110934102, "calibration/buffer_distribution_entropy": 0.940215096807232, "calibration/confidence_entropy": 0.4264768475786309, "calibration/coverage@0%": 0.24072610294117647, "calibration/coverage@1%": 0.2852573529411765, "calibration/coverage@10%": 0.5294638480392158, "calibration/coverage@15%": 0.63828125, "calibration/coverage@20%": 0.7453125, "calibration/coverage@25%": 0.81015625, "calibration/coverage@30%": 0.871875, "calibration/coverage@5%": 0.44806985294117646, "calibration/ece": 0.16837747358087166, "calibration/mean_confidence": 0.5540928773798913, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1317.8, "completions/max_terminated_length": 464.0, "completions/mean_length": 222.9625, "completions/mean_terminated_length": 222.44960021972656, "completions/min_length": 104.6, "completions/min_terminated_length": 104.6, "epoch": 0.768, "grad_norm": 0.0016375478589907289, "learning_rate": 1e-06, "loss": 0.0011, "num_tokens": 810778257.0, "reward": 1.0248207330703736, "reward_std": 0.061895406991243365, "rewards/accuracy_reward": 0.576953125, "rewards/brier_reward": 0.8413738012313843, "rewards/confidence_uniqueness_reward": 0.9406145691871644, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.001507855043746531, "rewards/frontier_coverage_1": 0.1511134535074234, "rewards/frontier_coverage_10": 0.14939744472503663, "rewards/frontier_coverage_15": 0.11022275984287262, "rewards/frontier_coverage_20": 0.08167696744203568, "rewards/frontier_coverage_25": 0.09813316464424134, "rewards/frontier_coverage_5": 0.1511134535074234, "rewards/frontier_ece_reward": 0.00472887079231441, "signal/accuracy_reward/centered_abs_mean": 0.07352294921875, "signal/accuracy_reward/group_std_mean": 0.09980905205011367, "signal/accuracy_reward/group_zero_std_frac": 0.709375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.036761474609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.036761474609375, "signal/advantage_abs_mean": 0.045818436145782473, "signal/advantage_pre_scale_abs_mean": 0.045818436145782473, "signal/advantage_pre_scale_std": 0.0930859088897705, "signal/advantage_std": 0.0930859088897705, "signal/brier_reward/centered_abs_mean": 0.09857990890741349, "signal/brier_reward/group_std_mean": 0.13005568087100983, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012322488613426686, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012322488613426686, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026614753901958464, "signal/confidence_uniqueness_reward/group_std_mean": 0.034846174716949466, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003326844237744808, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003326844237744808, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011487239389680326, "signal/frontier_aurc_reward/group_std_mean": 0.0017788737313821912, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.0562157442327588e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.0562157442327588e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1333732545375824, "signal/frontier_coverage_1/group_std_mean": 0.17638799846172332, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023873811587691307, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023873811587691307, "signal/frontier_coverage_10/centered_abs_mean": 0.13058110177516938, "signal/frontier_coverage_10/group_std_mean": 0.17269828617572786, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023374016396701335, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023374016396701335, "signal/frontier_coverage_15/centered_abs_mean": 0.08957252502441407, "signal/frontier_coverage_15/group_std_mean": 0.11884426325559616, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016033481108024717, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016033481108024717, "signal/frontier_coverage_20/centered_abs_mean": 0.06021819338202476, "signal/frontier_coverage_20/group_std_mean": 0.0782925844192505, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010779056698083877, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010779056698083877, "signal/frontier_coverage_25/centered_abs_mean": 0.057765302062034604, "signal/frontier_coverage_25/group_std_mean": 0.07384001463651657, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001033998851198703, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001033998851198703, "signal/frontier_coverage_5/centered_abs_mean": 0.1333732545375824, "signal/frontier_coverage_5/group_std_mean": 0.17638799846172332, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023873811587691307, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023873811587691307, "signal/frontier_ece_reward/centered_abs_mean": 0.0036720467731356623, "signal/frontier_ece_reward/group_std_mean": 0.004740559495985508, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004590058466419578, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004590058466419578, "step": 240 }, { "calibration/aurc": 0.1780989063016482, "calibration/batch_distribution_entropy": 0.8732530039163613, "calibration/buffer_distribution_entropy": 0.9368918455830914, "calibration/confidence_entropy": 0.37058566571107476, "calibration/coverage@0%": 0.16484375, "calibration/coverage@1%": 0.1953125, "calibration/coverage@10%": 0.44375, "calibration/coverage@15%": 0.503125, "calibration/coverage@20%": 0.5875, "calibration/coverage@25%": 0.71484375, "calibration/coverage@30%": 0.7625, "calibration/coverage@5%": 0.315625, "calibration/ece": 0.10408663335191717, "calibration/mean_confidence": 0.5442599486038305, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 693.4, "completions/max_terminated_length": 591.2, "completions/mean_length": 223.282421875, "completions/mean_terminated_length": 223.15485534667968, "completions/min_length": 110.4, "completions/min_terminated_length": 110.4, "epoch": 0.784, "grad_norm": 0.0017808079719543457, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 828239037.0, "reward": 1.0469671964645386, "reward_std": 0.0657818466424942, "rewards/accuracy_reward": 0.63125, "rewards/brier_reward": 0.8252038955688477, "rewards/confidence_uniqueness_reward": 0.9421940207481384, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0015878735110163688, "rewards/frontier_coverage_1": 0.10298990458250046, "rewards/frontier_coverage_10": 0.10325277298688888, "rewards/frontier_coverage_15": 0.08138184025883674, "rewards/frontier_coverage_20": 0.06994581818580628, "rewards/frontier_coverage_25": 0.10279036164283753, "rewards/frontier_coverage_5": 0.10298990458250046, "rewards/frontier_ece_reward": 0.0036763294599950315, "signal/accuracy_reward/centered_abs_mean": 0.08505859375, "signal/accuracy_reward/group_std_mean": 0.11480707228183747, "signal/accuracy_reward/group_zero_std_frac": 0.665625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042529296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042529296875, "signal/advantage_abs_mean": 0.04915754199028015, "signal/advantage_pre_scale_abs_mean": 0.04915754199028015, "signal/advantage_pre_scale_std": 0.09866253137588502, "signal/advantage_std": 0.09866253137588502, "signal/brier_reward/centered_abs_mean": 0.1016099825501442, "signal/brier_reward/group_std_mean": 0.13323958665132524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012701247818768024, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012701247818768024, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02635921761393547, "signal/confidence_uniqueness_reward/group_std_mean": 0.033456063643097875, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032949022017419336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032949022017419336, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001376147347036749, "signal/frontier_aurc_reward/group_std_mean": 0.002203846746124327, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.463303608237766e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.463303608237766e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13170208930969238, "signal/frontier_coverage_1/group_std_mean": 0.1737958937883377, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023574673570692537, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023574673570692537, "signal/frontier_coverage_10/centered_abs_mean": 0.12848464101552964, "signal/frontier_coverage_10/group_std_mean": 0.16970953047275544, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022998749278485774, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022998749278485774, "signal/frontier_coverage_15/centered_abs_mean": 0.08622983396053314, "signal/frontier_coverage_15/group_std_mean": 0.11427305340766906, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015435139182955026, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015435139182955026, "signal/frontier_coverage_20/centered_abs_mean": 0.05881091207265854, "signal/frontier_coverage_20/group_std_mean": 0.0768322467803955, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010527152917347848, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010527152917347848, "signal/frontier_coverage_25/centered_abs_mean": 0.05912056043744087, "signal/frontier_coverage_25/group_std_mean": 0.07621604949235916, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010582579649053513, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010582579649053513, "signal/frontier_coverage_5/centered_abs_mean": 0.13170208930969238, "signal/frontier_coverage_5/group_std_mean": 0.1737958937883377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023574673570692537, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023574673570692537, "signal/frontier_ece_reward/centered_abs_mean": 0.003539442550390959, "signal/frontier_ece_reward/group_std_mean": 0.004590986762195826, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044243031879886985, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044243031879886985, "step": 245 }, { "calibration/aurc": 0.26110712774742, "calibration/batch_distribution_entropy": 0.8637357131440693, "calibration/buffer_distribution_entropy": 0.9338101197498352, "calibration/confidence_entropy": 0.41911873533648614, "calibration/coverage@0%": 0.025, "calibration/coverage@1%": 0.025, "calibration/coverage@10%": 0.30546875, "calibration/coverage@15%": 0.34453125, "calibration/coverage@20%": 0.54609375, "calibration/coverage@25%": 0.5859375, "calibration/coverage@30%": 0.63671875, "calibration/coverage@5%": 0.078125, "calibration/ece": 0.1646667575702236, "calibration/mean_confidence": 0.5018031308830292, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 694.4, "completions/max_terminated_length": 509.2, "completions/mean_length": 220.36611328125, "completions/mean_terminated_length": 220.2374237060547, "completions/min_length": 105.8, "completions/min_terminated_length": 105.8, "epoch": 0.8, "grad_norm": 0.002036831108853221, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 845506146.0, "reward": 1.0645583629608155, "reward_std": 0.06164888888597488, "rewards/accuracy_reward": 0.6568359375, "rewards/brier_reward": 0.8579505681991577, "rewards/confidence_uniqueness_reward": 0.9386770725250244, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.001275635720230639, "rewards/frontier_coverage_1": 0.11075811237096786, "rewards/frontier_coverage_10": 0.10875446647405625, "rewards/frontier_coverage_15": 0.08313208520412445, "rewards/frontier_coverage_20": 0.07641463130712509, "rewards/frontier_coverage_25": 0.13115044236183165, "rewards/frontier_coverage_5": 0.11075811237096786, "rewards/frontier_ece_reward": 0.004146079532802105, "signal/accuracy_reward/centered_abs_mean": 0.0808837890625, "signal/accuracy_reward/group_std_mean": 0.10505216717720031, "signal/accuracy_reward/group_zero_std_frac": 0.703125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04044189453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04044189453125, "signal/advantage_abs_mean": 0.04755032882094383, "signal/advantage_pre_scale_abs_mean": 0.04755032882094383, "signal/advantage_pre_scale_std": 0.09882079064846039, "signal/advantage_std": 0.09882079064846039, "signal/brier_reward/centered_abs_mean": 0.08948185741901397, "signal/brier_reward/group_std_mean": 0.1173609048128128, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011185232177376747, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011185232177376747, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028289894759654998, "signal/confidence_uniqueness_reward/group_std_mean": 0.035136304795742035, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0035362368449568748, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035362368449568748, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013092580833472312, "signal/frontier_aurc_reward/group_std_mean": 0.0021080786129459737, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.343571886740392e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.343571886740392e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11353515535593033, "signal/frontier_coverage_1/group_std_mean": 0.15011467039585114, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020322792232036592, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020322792232036592, "signal/frontier_coverage_10/centered_abs_mean": 0.10932374000549316, "signal/frontier_coverage_10/group_std_mean": 0.14465901702642442, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001956894900649786, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001956894900649786, "signal/frontier_coverage_15/centered_abs_mean": 0.07167089506983756, "signal/frontier_coverage_15/group_std_mean": 0.0952614426612854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001282908977009356, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001282908977009356, "signal/frontier_coverage_20/centered_abs_mean": 0.05188070461153984, "signal/frontier_coverage_20/group_std_mean": 0.0674702912569046, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009286645916290581, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009286645916290581, "signal/frontier_coverage_25/centered_abs_mean": 0.06000246405601502, "signal/frontier_coverage_25/group_std_mean": 0.0765003427863121, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010740441037341952, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010740441037341952, "signal/frontier_coverage_5/centered_abs_mean": 0.11353515535593033, "signal/frontier_coverage_5/group_std_mean": 0.15011467039585114, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020322792232036592, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020322792232036592, "signal/frontier_ece_reward/centered_abs_mean": 0.0031503901816904547, "signal/frontier_ece_reward/group_std_mean": 0.004117331793531775, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039379877271130683, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039379877271130683, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4328469329018436, "eval_calibration/batch_distribution_entropy": 0.9017827126146071, "eval_calibration/buffer_distribution_entropy": 0.933450971645078, "eval_calibration/confidence_entropy": 0.4392779969401228, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.25, "eval_calibration/coverage@25%": 0.3125, "eval_calibration/coverage@30%": 0.34375, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.214326634140625, "eval_calibration/mean_confidence": 0.5412016341406249, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 416.0, "eval_completions/max_terminated_length": 416.0, "eval_completions/mean_length": 219.94239044189453, "eval_completions/mean_terminated_length": 219.94239044189453, "eval_completions/min_length": 115.5, "eval_completions/min_terminated_length": 115.5, "eval_loss": 0.0, "eval_num_tokens": 845506146.0, "eval_reward": 0.9467860460281372, "eval_reward_std": 0.24670489132404327, "eval_rewards/accuracy_reward": 0.44140625, "eval_rewards/brier_reward": 0.7873809337615967, "eval_rewards/confidence_uniqueness_reward": 0.890869140625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004038012819364667, "eval_rewards/frontier_coverage_1": 0.20367811620235443, "eval_rewards/frontier_coverage_10": 0.19325406849384308, "eval_rewards/frontier_coverage_15": 0.12953777611255646, "eval_rewards/frontier_coverage_20": 0.08314738422632217, "eval_rewards/frontier_coverage_25": 0.06818825379014015, "eval_rewards/frontier_coverage_5": 0.20367811620235443, "eval_rewards/frontier_ece_reward": 0.004763010889291763, "eval_runtime": 10.7531, "eval_samples_per_second": 46.498, "eval_signal/accuracy_reward/centered_abs_mean": 0.473388671875, "eval_signal/accuracy_reward/group_std_mean": 0.4939229190349579, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2366943359375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2366943359375, "eval_signal/advantage_abs_mean": 0.22974882274866104, "eval_signal/advantage_pre_scale_abs_mean": 0.22974882274866104, "eval_signal/advantage_pre_scale_std": 0.2435239553451538, "eval_signal/advantage_std": 0.2435239553451538, "eval_signal/brier_reward/centered_abs_mean": 0.23106026649475098, "eval_signal/brier_reward/group_std_mean": 0.2880419045686722, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028882533311843872, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.028882533311843872, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0494232177734375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.058502499014139175, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0061779022216796875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0061779022216796875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.00543490145355463, "eval_signal/frontier_aurc_reward/group_std_mean": 0.010733058210462332, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.728474105941132e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.728474105941132e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.34148095548152924, "eval_signal/frontier_coverage_1/group_std_mean": 0.4213644117116928, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006112508941441774, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006112508941441774, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3236210346221924, "eval_signal/frontier_coverage_10/group_std_mean": 0.4000513255596161, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005792815936729312, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005792815936729312, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.20580045133829117, "eval_signal/frontier_coverage_15/group_std_mean": 0.2604397386312485, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036838280502706766, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036838280502706766, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.11519244313240051, "eval_signal/frontier_coverage_20/group_std_mean": 0.1483083888888359, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020619446877390146, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020619446877390146, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.1481623351573944, "eval_signal/frontier_coverage_25/group_std_mean": 0.19164805114269257, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00265210575889796, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00265210575889796, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.34148095548152924, "eval_signal/frontier_coverage_5/group_std_mean": 0.4213644117116928, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006112508941441774, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006112508941441774, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006984395207837224, "eval_signal/frontier_ece_reward/group_std_mean": 0.008835344575345516, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000873049400979653, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000873049400979653, "eval_steps_per_second": 0.186, "step": 250 }, { "epoch": 0.8, "step": 250, "train_probe_calibration/aurc": 0.12986536721544725, "train_probe_calibration/batch_distribution_entropy": 0.811839844274961, "train_probe_calibration/buffer_distribution_entropy": 0.933494045269648, "train_probe_calibration/confidence_entropy": 0.354105295763641, "train_probe_calibration/coverage@0%": 0.140625, "train_probe_calibration/coverage@1%": 0.140625, "train_probe_calibration/coverage@10%": 0.609375, "train_probe_calibration/coverage@15%": 0.765625, "train_probe_calibration/coverage@20%": 0.828125, "train_probe_calibration/coverage@25%": 0.90625, "train_probe_calibration/coverage@30%": 0.921875, "train_probe_calibration/coverage@5%": 0.484375, "train_probe_calibration/ece": 0.13531250000000003, "train_probe_calibration/mean_confidence": 0.624875, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 376.0, "train_probe_completions/max_terminated_length": 376.0, "train_probe_completions/mean_length": 217.32789611816406, "train_probe_completions/mean_terminated_length": 217.32789611816406, "train_probe_completions/min_length": 120.5, "train_probe_completions/min_terminated_length": 120.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 845506146.0, "train_probe_reward": 1.0561645030975342, "train_probe_reward_std": 0.2325623854994774, "train_probe_rewards/accuracy_reward": 0.654296875, "train_probe_rewards/brier_reward": 0.8493243455886841, "train_probe_rewards/confidence_uniqueness_reward": 0.889404296875, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.001908503647428006, "train_probe_rewards/frontier_coverage_1": 0.11155515164136887, "train_probe_rewards/frontier_coverage_10": 0.10592306032776833, "train_probe_rewards/frontier_coverage_15": 0.08040037006139755, "train_probe_rewards/frontier_coverage_20": 0.0775928758084774, "train_probe_rewards/frontier_coverage_25": 0.1379874050617218, "train_probe_rewards/frontier_coverage_5": 0.11155515164136887, "train_probe_rewards/frontier_ece_reward": 0.004171320935711265, "train_probe_runtime": 10.1866, "train_probe_samples_per_second": 49.084, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4449462890625, "train_probe_signal/accuracy_reward/group_std_mean": 0.4788653701543808, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22247314453125, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22247314453125, "train_probe_signal/advantage_abs_mean": 0.21122215688228607, "train_probe_signal/advantage_pre_scale_abs_mean": 0.21122215688228607, "train_probe_signal/advantage_pre_scale_std": 0.22976724058389664, "train_probe_signal/advantage_std": 0.22976724058389664, "train_probe_signal/brier_reward/centered_abs_mean": 0.18194539844989777, "train_probe_signal/brier_reward/group_std_mean": 0.245933398604393, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02274317480623722, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02274317480623722, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0509185791015625, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.061591994017362595, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063648223876953125, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063648223876953125, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.003361418261192739, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.006412317277863622, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.016938641550951e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.016938641550951e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3020322024822235, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4129178822040558, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0054063762072473764, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0054063762072473764, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2831447720527649, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.3897576928138733, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050682914443314075, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0050682914443314075, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1778106540441513, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.2535991668701172, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031828106148168445, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031828106148168445, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1011722981929779, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.14043454825878143, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018109841039404273, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018109841039404273, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.14492832124233246, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.17802315205335617, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002594216726720333, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002594216726720333, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3020322024822235, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4129178822040558, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0054063762072473764, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0054063762072473764, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.006162431091070175, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.008223664714023471, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007703038863837719, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007703038863837719, "train_probe_steps_per_second": 0.196 }, { "calibration/aurc": 0.2613374077664524, "calibration/batch_distribution_entropy": 0.8562545855944862, "calibration/buffer_distribution_entropy": 0.9332085807167052, "calibration/confidence_entropy": 0.36503547135583225, "calibration/coverage@0%": 0.0875, "calibration/coverage@1%": 0.0921875, "calibration/coverage@10%": 0.27109375, "calibration/coverage@15%": 0.346875, "calibration/coverage@20%": 0.40078125, "calibration/coverage@25%": 0.44921875, "calibration/coverage@30%": 0.6296875, "calibration/coverage@5%": 0.17265625, "calibration/ece": 0.14208048120424616, "calibration/mean_confidence": 0.5988117062957538, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 768.4, "completions/max_terminated_length": 564.4, "completions/mean_length": 213.418359375, "completions/mean_terminated_length": 213.2895263671875, "completions/min_length": 100.2, "completions/min_terminated_length": 100.2, "epoch": 0.816, "grad_norm": 0.0023187189362943172, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 862790718.0, "reward": 1.053348708152771, "reward_std": 0.06448897942900658, "rewards/accuracy_reward": 0.64423828125, "rewards/brier_reward": 0.8300428271293641, "rewards/confidence_uniqueness_reward": 0.9373907327651978, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002119234437122941, "rewards/frontier_coverage_1": 0.09330451190471649, "rewards/frontier_coverage_10": 0.09116496592760086, "rewards/frontier_coverage_15": 0.0719268336892128, "rewards/frontier_coverage_20": 0.07347770035266876, "rewards/frontier_coverage_25": 0.13299526423215866, "rewards/frontier_coverage_5": 0.09330451190471649, "rewards/frontier_ece_reward": 0.0034532100893557073, "signal/accuracy_reward/centered_abs_mean": 0.082476806640625, "signal/accuracy_reward/group_std_mean": 0.10868191868066787, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0412384033203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0412384033203125, "signal/advantage_abs_mean": 0.048784293979406354, "signal/advantage_pre_scale_abs_mean": 0.048784293979406354, "signal/advantage_pre_scale_std": 0.09951501935720444, "signal/advantage_std": 0.09951501935720444, "signal/brier_reward/centered_abs_mean": 0.10197662115097046, "signal/brier_reward/group_std_mean": 0.13159122467041015, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012747077643871308, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012747077643871308, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02778756096959114, "signal/confidence_uniqueness_reward/group_std_mean": 0.03503857851028443, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034734451211988924, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034734451211988924, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020503590581938624, "signal/frontier_aurc_reward/group_std_mean": 0.003305292781442404, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6701426142826674e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6701426142826674e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12355931252241134, "signal/frontier_coverage_1/group_std_mean": 0.1595274031162262, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022117116721346976, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022117116721346976, "signal/frontier_coverage_10/centered_abs_mean": 0.11546845138072967, "signal/frontier_coverage_10/group_std_mean": 0.1491788625717163, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020668851910158994, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020668851910158994, "signal/frontier_coverage_15/centered_abs_mean": 0.07564910650253295, "signal/frontier_coverage_15/group_std_mean": 0.09829453229904175, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013541190419346094, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013541190419346094, "signal/frontier_coverage_20/centered_abs_mean": 0.05537274181842804, "signal/frontier_coverage_20/group_std_mean": 0.0709018051624298, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000991172017529607, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000991172017529607, "signal/frontier_coverage_25/centered_abs_mean": 0.06835410594940186, "signal/frontier_coverage_25/group_std_mean": 0.08684322088956833, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012235384434461593, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012235384434461593, "signal/frontier_coverage_5/centered_abs_mean": 0.12355931252241134, "signal/frontier_coverage_5/group_std_mean": 0.1595274031162262, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022117116721346976, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022117116721346976, "signal/frontier_ece_reward/centered_abs_mean": 0.0032832324504852295, "signal/frontier_ece_reward/group_std_mean": 0.0042684660758823155, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004104040563106537, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004104040563106537, "step": 255 }, { "calibration/aurc": 0.29902088331269355, "calibration/batch_distribution_entropy": 0.8771544045312076, "calibration/buffer_distribution_entropy": 0.9328252547851499, "calibration/confidence_entropy": 0.3984631748431898, "calibration/coverage@0%": 0.18125, "calibration/coverage@1%": 0.18359375, "calibration/coverage@10%": 0.2484375, "calibration/coverage@15%": 0.26953125, "calibration/coverage@20%": 0.37109375, "calibration/coverage@25%": 0.5015625, "calibration/coverage@30%": 0.58515625, "calibration/coverage@5%": 0.21953125, "calibration/ece": 0.14786460040812144, "calibration/mean_confidence": 0.5620528753555174, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.8, "completions/max_terminated_length": 467.8, "completions/mean_length": 210.1916015625, "completions/mean_terminated_length": 210.1916015625, "completions/min_length": 95.4, "completions/min_terminated_length": 95.4, "epoch": 0.832, "grad_norm": 0.001390106393955648, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 879951432.0, "reward": 1.0441203117370605, "reward_std": 0.05937432199716568, "rewards/accuracy_reward": 0.61279296875, "rewards/brier_reward": 0.85388263463974, "rewards/confidence_uniqueness_reward": 0.9362106323242188, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0018755205674096942, "rewards/frontier_coverage_1": 0.1422416090965271, "rewards/frontier_coverage_10": 0.13426189720630646, "rewards/frontier_coverage_15": 0.09782664477825165, "rewards/frontier_coverage_20": 0.09015188366174698, "rewards/frontier_coverage_25": 0.1458705931901932, "rewards/frontier_coverage_5": 0.1422416090965271, "rewards/frontier_ece_reward": 0.004194558784365654, "signal/accuracy_reward/centered_abs_mean": 0.071063232421875, "signal/accuracy_reward/group_std_mean": 0.09939071238040924, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0355316162109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0355316162109375, "signal/advantage_abs_mean": 0.043323104828596117, "signal/advantage_pre_scale_abs_mean": 0.043323104828596117, "signal/advantage_pre_scale_std": 0.09220470041036606, "signal/advantage_std": 0.09220470041036606, "signal/brier_reward/centered_abs_mean": 0.08896346092224121, "signal/brier_reward/group_std_mean": 0.11624416410923004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011120432615280151, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011120432615280151, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027381277084350585, "signal/confidence_uniqueness_reward/group_std_mean": 0.03401793241500854, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003422659635543823, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003422659635543823, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018187327776104211, "signal/frontier_aurc_reward/group_std_mean": 0.0029950566589832307, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.255531628383323e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.255531628383323e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11554279178380966, "signal/frontier_coverage_1/group_std_mean": 0.1498140126466751, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002068215887993574, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002068215887993574, "signal/frontier_coverage_10/centered_abs_mean": 0.10581835210323334, "signal/frontier_coverage_10/group_std_mean": 0.13711453676223756, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018941484624519945, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018941484624519945, "signal/frontier_coverage_15/centered_abs_mean": 0.07099459692835808, "signal/frontier_coverage_15/group_std_mean": 0.09155822247266769, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012708032154478133, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012708032154478133, "signal/frontier_coverage_20/centered_abs_mean": 0.05304303243756294, "signal/frontier_coverage_20/group_std_mean": 0.066974838078022, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009494703030213713, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009494703030213713, "signal/frontier_coverage_25/centered_abs_mean": 0.06249256357550621, "signal/frontier_coverage_25/group_std_mean": 0.08098939657211304, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011186168296262622, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011186168296262622, "signal/frontier_coverage_5/centered_abs_mean": 0.11554279178380966, "signal/frontier_coverage_5/group_std_mean": 0.1498140126466751, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002068215887993574, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002068215887993574, "signal/frontier_ece_reward/centered_abs_mean": 0.003021185612305999, "signal/frontier_ece_reward/group_std_mean": 0.003916465956717729, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037764820153824986, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037764820153824986, "step": 260 }, { "calibration/aurc": 0.16050257914279836, "calibration/batch_distribution_entropy": 0.8431910556560627, "calibration/buffer_distribution_entropy": 0.931636889634975, "calibration/confidence_entropy": 0.3986462888374449, "calibration/coverage@0%": 0.12734375, "calibration/coverage@1%": 0.1640625, "calibration/coverage@10%": 0.41015625, "calibration/coverage@15%": 0.50234375, "calibration/coverage@20%": 0.76328125, "calibration/coverage@25%": 0.8515625, "calibration/coverage@30%": 0.93046875, "calibration/coverage@5%": 0.26171875, "calibration/ece": 0.14965263001674728, "calibration/mean_confidence": 0.6677016463180183, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 720.6, "completions/max_terminated_length": 503.2, "completions/mean_length": 205.6150390625, "completions/mean_terminated_length": 205.48502502441406, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.848, "grad_norm": 0.0016696910606697202, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 897071298.0, "reward": 1.0344059467315674, "reward_std": 0.05915949493646622, "rewards/accuracy_reward": 0.5978515625, "rewards/brier_reward": 0.8411754608154297, "rewards/confidence_uniqueness_reward": 0.9387550115585327, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0017502504400908948, "rewards/frontier_coverage_1": 0.13980764299631118, "rewards/frontier_coverage_10": 0.12554386407136917, "rewards/frontier_coverage_15": 0.09122110307216644, "rewards/frontier_coverage_20": 0.0802506908774376, "rewards/frontier_coverage_25": 0.12608129382133484, "rewards/frontier_coverage_5": 0.13980764299631118, "rewards/frontier_ece_reward": 0.00392393465153873, "signal/accuracy_reward/centered_abs_mean": 0.0715087890625, "signal/accuracy_reward/group_std_mean": 0.09724359661340713, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03575439453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03575439453125, "signal/advantage_abs_mean": 0.044149909913539884, "signal/advantage_pre_scale_abs_mean": 0.044149909913539884, "signal/advantage_pre_scale_std": 0.0919294998049736, "signal/advantage_std": 0.0919294998049736, "signal/brier_reward/centered_abs_mean": 0.0906279519200325, "signal/brier_reward/group_std_mean": 0.12001040577888489, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011328493990004063, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011328493990004063, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026908674091100693, "signal/confidence_uniqueness_reward/group_std_mean": 0.0335762545466423, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033635842613875867, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033635842613875867, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.001596922567114234, "signal/frontier_aurc_reward/group_std_mean": 0.0026214892510324716, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.858491352526471e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.858491352526471e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12007757127285004, "signal/frontier_coverage_1/group_std_mean": 0.16089081168174743, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149388426914811, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149388426914811, "signal/frontier_coverage_10/centered_abs_mean": 0.1089574933052063, "signal/frontier_coverage_10/group_std_mean": 0.14638633131980897, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019503391114994884, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019503391114994884, "signal/frontier_coverage_15/centered_abs_mean": 0.07160564810037613, "signal/frontier_coverage_15/group_std_mean": 0.0962700754404068, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012817410985007881, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012817410985007881, "signal/frontier_coverage_20/centered_abs_mean": 0.0523877888917923, "signal/frontier_coverage_20/group_std_mean": 0.06873219013214112, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009377413894981146, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009377413894981146, "signal/frontier_coverage_25/centered_abs_mean": 0.06207484975457191, "signal/frontier_coverage_25/group_std_mean": 0.08022152930498123, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011111397529020906, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011111397529020906, "signal/frontier_coverage_5/centered_abs_mean": 0.12007757127285004, "signal/frontier_coverage_5/group_std_mean": 0.16089081168174743, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002149388426914811, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002149388426914811, "signal/frontier_ece_reward/centered_abs_mean": 0.0029800481628626586, "signal/frontier_ece_reward/group_std_mean": 0.003989115683361888, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003725060203578323, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003725060203578323, "step": 265 }, { "calibration/aurc": 0.1268785574113444, "calibration/batch_distribution_entropy": 0.8655835602170416, "calibration/buffer_distribution_entropy": 0.930216643410773, "calibration/confidence_entropy": 0.38734716180223816, "calibration/coverage@0%": 0.37265625, "calibration/coverage@1%": 0.5234375, "calibration/coverage@10%": 0.6515625, "calibration/coverage@15%": 0.684375, "calibration/coverage@20%": 0.7078125, "calibration/coverage@25%": 0.7296875, "calibration/coverage@30%": 0.7578125, "calibration/coverage@5%": 0.60625, "calibration/ece": 0.18136702633101748, "calibration/mean_confidence": 0.6521458386893639, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 934.0, "completions/max_terminated_length": 527.6, "completions/mean_length": 207.2068359375, "completions/mean_terminated_length": 206.9475830078125, "completions/min_length": 102.4, "completions/min_terminated_length": 102.4, "epoch": 0.864, "grad_norm": 0.002203070791438222, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 914179912.0, "reward": 1.0581453800201417, "reward_std": 0.063059052079916, "rewards/accuracy_reward": 0.65205078125, "rewards/brier_reward": 0.836116099357605, "rewards/confidence_uniqueness_reward": 0.9410740494728088, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0015314666437916459, "rewards/frontier_coverage_1": 0.09230080395936965, "rewards/frontier_coverage_10": 0.08503075465559959, "rewards/frontier_coverage_15": 0.06719348207116127, "rewards/frontier_coverage_20": 0.07197408005595207, "rewards/frontier_coverage_25": 0.1352065086364746, "rewards/frontier_coverage_5": 0.09230080395936965, "rewards/frontier_ece_reward": 0.002868586964905262, "signal/accuracy_reward/centered_abs_mean": 0.080035400390625, "signal/accuracy_reward/group_std_mean": 0.10791658908128739, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400177001953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0400177001953125, "signal/advantage_abs_mean": 0.04706686735153198, "signal/advantage_pre_scale_abs_mean": 0.04706686735153198, "signal/advantage_pre_scale_std": 0.09770552664995194, "signal/advantage_std": 0.09770552664995194, "signal/brier_reward/centered_abs_mean": 0.09740178287029266, "signal/brier_reward/group_std_mean": 0.12451921701431275, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012175222858786583, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012175222858786583, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025182069465517997, "signal/confidence_uniqueness_reward/group_std_mean": 0.031810386851429936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031477586831897496, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031477586831897496, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001567194890230894, "signal/frontier_aurc_reward/group_std_mean": 0.0025726008461788297, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8052787092747168e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8052787092747168e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12118019163608551, "signal/frontier_coverage_1/group_std_mean": 0.15813361406326293, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002169125364162028, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002169125364162028, "signal/frontier_coverage_10/centered_abs_mean": 0.10587679147720337, "signal/frontier_coverage_10/group_std_mean": 0.13836515247821807, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018951945239678025, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018951945239678025, "signal/frontier_coverage_15/centered_abs_mean": 0.07084731981158257, "signal/frontier_coverage_15/group_std_mean": 0.0918091282248497, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012681669555604457, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012681669555604457, "signal/frontier_coverage_20/centered_abs_mean": 0.05579846650362015, "signal/frontier_coverage_20/group_std_mean": 0.07049720138311386, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009987925528548657, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009987925528548657, "signal/frontier_coverage_25/centered_abs_mean": 0.06971824020147324, "signal/frontier_coverage_25/group_std_mean": 0.08754518479108811, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012479565106332303, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012479565106332303, "signal/frontier_coverage_5/centered_abs_mean": 0.12118019163608551, "signal/frontier_coverage_5/group_std_mean": 0.15813361406326293, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002169125364162028, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002169125364162028, "signal/frontier_ece_reward/centered_abs_mean": 0.0029707029927521942, "signal/frontier_ece_reward/group_std_mean": 0.0038403474260121583, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003713378740940243, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003713378740940243, "step": 270 }, { "calibration/aurc": 0.273963740303666, "calibration/batch_distribution_entropy": 0.8652895848583995, "calibration/buffer_distribution_entropy": 0.9298649992702075, "calibration/confidence_entropy": 0.37132087278349013, "calibration/coverage@0%": 0.10390625, "calibration/coverage@1%": 0.125, "calibration/coverage@10%": 0.25390625, "calibration/coverage@15%": 0.2765625, "calibration/coverage@20%": 0.38828125, "calibration/coverage@25%": 0.4703125, "calibration/coverage@30%": 0.5703125, "calibration/coverage@5%": 0.18046875, "calibration/ece": 0.16565321925298856, "calibration/mean_confidence": 0.5717535064279697, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 677.8, "completions/max_terminated_length": 465.0, "completions/mean_length": 201.7654296875, "completions/mean_terminated_length": 201.63529052734376, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.88, "grad_norm": 0.001780420308932662, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 931393062.0, "reward": 1.0194225072860719, "reward_std": 0.0662582591176033, "rewards/accuracy_reward": 0.57275390625, "rewards/brier_reward": 0.8221846461296082, "rewards/confidence_uniqueness_reward": 0.9409846425056457, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002486996748484671, "rewards/frontier_coverage_1": 0.14020877778530122, "rewards/frontier_coverage_10": 0.12404286712408066, "rewards/frontier_coverage_15": 0.08958611041307449, "rewards/frontier_coverage_20": 0.07855436801910401, "rewards/frontier_coverage_25": 0.11499525308609009, "rewards/frontier_coverage_5": 0.14020877778530122, "rewards/frontier_ece_reward": 0.00347807789221406, "signal/accuracy_reward/centered_abs_mean": 0.084307861328125, "signal/accuracy_reward/group_std_mean": 0.11367884427309036, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421539306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0421539306640625, "signal/advantage_abs_mean": 0.05013991966843605, "signal/advantage_pre_scale_abs_mean": 0.05013991966843605, "signal/advantage_pre_scale_std": 0.10045773237943649, "signal/advantage_std": 0.10045773237943649, "signal/brier_reward/centered_abs_mean": 0.10293448865413665, "signal/brier_reward/group_std_mean": 0.13437058925628662, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012866811081767082, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012866811081767082, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024662094563245772, "signal/confidence_uniqueness_reward/group_std_mean": 0.03065968081355095, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030827618204057215, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030827618204057215, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002529387711547315, "signal/frontier_aurc_reward/group_std_mean": 0.004110026638954878, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5276039600139484e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5276039600139484e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13029766380786895, "signal/frontier_coverage_1/group_std_mean": 0.17371802926063537, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002332328073680401, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002332328073680401, "signal/frontier_coverage_10/centered_abs_mean": 0.11453571021556855, "signal/frontier_coverage_10/group_std_mean": 0.15250465869903565, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020501891616731883, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020501891616731883, "signal/frontier_coverage_15/centered_abs_mean": 0.07582000344991684, "signal/frontier_coverage_15/group_std_mean": 0.10073214769363403, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001357178040780127, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001357178040780127, "signal/frontier_coverage_20/centered_abs_mean": 0.0566535584628582, "signal/frontier_coverage_20/group_std_mean": 0.0733156070113182, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010140986763872207, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010140986763872207, "signal/frontier_coverage_25/centered_abs_mean": 0.06789239197969436, "signal/frontier_coverage_25/group_std_mean": 0.0869957149028778, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012152737472206354, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012152737472206354, "signal/frontier_coverage_5/centered_abs_mean": 0.13029766380786895, "signal/frontier_coverage_5/group_std_mean": 0.17371802926063537, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002332328073680401, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002332328073680401, "signal/frontier_ece_reward/centered_abs_mean": 0.003146560303866863, "signal/frontier_ece_reward/group_std_mean": 0.004122556420043111, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003933200379833579, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003933200379833579, "step": 275 }, { "calibration/aurc": 0.25215917817714806, "calibration/batch_distribution_entropy": 0.8739498979185845, "calibration/buffer_distribution_entropy": 0.930060328102031, "calibration/confidence_entropy": 0.39616207217016336, "calibration/coverage@0%": 0.103125, "calibration/coverage@1%": 0.13125, "calibration/coverage@10%": 0.45390625, "calibration/coverage@15%": 0.490625, "calibration/coverage@20%": 0.5234375, "calibration/coverage@25%": 0.546875, "calibration/coverage@30%": 0.56640625, "calibration/coverage@5%": 0.22109375, "calibration/ece": 0.1740826497875611, "calibration/mean_confidence": 0.5997064484977905, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 661.8, "completions/max_terminated_length": 443.6, "completions/mean_length": 200.58515625, "completions/mean_terminated_length": 200.45480651855468, "completions/min_length": 96.6, "completions/min_terminated_length": 96.6, "epoch": 0.896, "grad_norm": 0.001766073633916676, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 948557902.0, "reward": 1.0397424459457398, "reward_std": 0.06280734091997146, "rewards/accuracy_reward": 0.6123046875, "rewards/brier_reward": 0.8335294604301453, "rewards/confidence_uniqueness_reward": 0.94145667552948, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002396345266606659, "rewards/frontier_coverage_1": 0.12290604412555695, "rewards/frontier_coverage_10": 0.10812564045190812, "rewards/frontier_coverage_15": 0.08217538744211197, "rewards/frontier_coverage_20": 0.07664992213249207, "rewards/frontier_coverage_25": 0.12757501602172852, "rewards/frontier_coverage_5": 0.12290604412555695, "rewards/frontier_ece_reward": 0.0031625948380678893, "signal/accuracy_reward/centered_abs_mean": 0.07562255859375, "signal/accuracy_reward/group_std_mean": 0.10868183225393295, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037811279296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.037811279296875, "signal/advantage_abs_mean": 0.04412608295679092, "signal/advantage_pre_scale_abs_mean": 0.04412608295679092, "signal/advantage_pre_scale_std": 0.09516832679510116, "signal/advantage_std": 0.09516832679510116, "signal/brier_reward/centered_abs_mean": 0.09220918267965317, "signal/brier_reward/group_std_mean": 0.12125321626663207, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011526147834956646, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011526147834956646, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023380208760499954, "signal/confidence_uniqueness_reward/group_std_mean": 0.02966206856071949, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029225260950624943, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029225260950624943, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021038307808339597, "signal/frontier_aurc_reward/group_std_mean": 0.0033327710116282105, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7658572182408534e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7658572182408534e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.11688004732131958, "signal/frontier_coverage_1/group_std_mean": 0.15271745324134828, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00209215278737247, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00209215278737247, "signal/frontier_coverage_10/centered_abs_mean": 0.10004872977733612, "signal/frontier_coverage_10/group_std_mean": 0.13066715896129608, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001790872262790799, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001790872262790799, "signal/frontier_coverage_15/centered_abs_mean": 0.06665360033512116, "signal/frontier_coverage_15/group_std_mean": 0.08639876991510391, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011930993758141994, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011930993758141994, "signal/frontier_coverage_20/centered_abs_mean": 0.05205147713422775, "signal/frontier_coverage_20/group_std_mean": 0.06605355590581893, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009317214018665255, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009317214018665255, "signal/frontier_coverage_25/centered_abs_mean": 0.06579188704490661, "signal/frontier_coverage_25/group_std_mean": 0.08515497148036957, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011776747182011605, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011776747182011605, "signal/frontier_coverage_5/centered_abs_mean": 0.11688004732131958, "signal/frontier_coverage_5/group_std_mean": 0.15271745324134828, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00209215278737247, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00209215278737247, "signal/frontier_ece_reward/centered_abs_mean": 0.0026048448868095874, "signal/frontier_ece_reward/group_std_mean": 0.0034122115466743708, "signal/frontier_ece_reward/group_zero_std_frac": 0.040625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003256056108511984, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003256056108511984, "step": 280 }, { "calibration/aurc": 0.2771183806359747, "calibration/batch_distribution_entropy": 0.8578019537959676, "calibration/buffer_distribution_entropy": 0.9298157417811967, "calibration/confidence_entropy": 0.38584274130978946, "calibration/coverage@0%": 0.19453125, "calibration/coverage@1%": 0.20078125, "calibration/coverage@10%": 0.31953125, "calibration/coverage@15%": 0.41953125, "calibration/coverage@20%": 0.4734375, "calibration/coverage@25%": 0.59375, "calibration/coverage@30%": 0.62734375, "calibration/coverage@5%": 0.25234375, "calibration/ece": 0.1462862952874513, "calibration/mean_confidence": 0.5480367561507691, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.0, "completions/max_terminated_length": 504.0, "completions/mean_length": 203.267578125, "completions/mean_terminated_length": 203.267578125, "completions/min_length": 97.6, "completions/min_terminated_length": 97.6, "epoch": 0.912, "grad_norm": 0.0015597037272527814, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 965690658.0, "reward": 1.02649986743927, "reward_std": 0.06281092613935471, "rewards/accuracy_reward": 0.58515625, "rewards/brier_reward": 0.8292442321777344, "rewards/confidence_uniqueness_reward": 0.946649169921875, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0018648097291588783, "rewards/frontier_coverage_1": 0.13054397702217102, "rewards/frontier_coverage_10": 0.11399659514427185, "rewards/frontier_coverage_15": 0.08423100709915161, "rewards/frontier_coverage_20": 0.07361575737595558, "rewards/frontier_coverage_25": 0.1144769087433815, "rewards/frontier_coverage_5": 0.13054397702217102, "rewards/frontier_ece_reward": 0.0030390231404453516, "signal/accuracy_reward/centered_abs_mean": 0.079638671875, "signal/accuracy_reward/group_std_mean": 0.10629072934389114, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0398193359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0398193359375, "signal/advantage_abs_mean": 0.04734830111265183, "signal/advantage_pre_scale_abs_mean": 0.04734830111265183, "signal/advantage_pre_scale_std": 0.09430029839277268, "signal/advantage_std": 0.09430029839277268, "signal/brier_reward/centered_abs_mean": 0.09894435703754426, "signal/brier_reward/group_std_mean": 0.12911319881677627, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012368044629693032, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012368044629693032, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022022104263305663, "signal/confidence_uniqueness_reward/group_std_mean": 0.027249596640467645, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002752763032913208, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002752763032913208, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014280044939368962, "signal/frontier_aurc_reward/group_std_mean": 0.0022502636536955835, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5561279471730813e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5561279471730813e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1327954038977623, "signal/frontier_coverage_1/group_std_mean": 0.17255037724971772, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002377037703990936, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002377037703990936, "signal/frontier_coverage_10/centered_abs_mean": 0.11172600984573364, "signal/frontier_coverage_10/group_std_mean": 0.14537906944751738, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019998955307528377, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019998955307528377, "signal/frontier_coverage_15/centered_abs_mean": 0.07505722343921661, "signal/frontier_coverage_15/group_std_mean": 0.09774749577045441, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013435242231935262, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013435242231935262, "signal/frontier_coverage_20/centered_abs_mean": 0.0550868459045887, "signal/frontier_coverage_20/group_std_mean": 0.07034202218055725, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.000986054469831288, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000986054469831288, "signal/frontier_coverage_25/centered_abs_mean": 0.06590208411216736, "signal/frontier_coverage_25/group_std_mean": 0.08453426957130432, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011796473059803247, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011796473059803247, "signal/frontier_coverage_5/centered_abs_mean": 0.1327954038977623, "signal/frontier_coverage_5/group_std_mean": 0.17255037724971772, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002377037703990936, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002377037703990936, "signal/frontier_ece_reward/centered_abs_mean": 0.002762398170307279, "signal/frontier_ece_reward/group_std_mean": 0.003595150355249643, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00034529977128840985, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00034529977128840985, "step": 285 }, { "calibration/aurc": 0.18523557038518218, "calibration/batch_distribution_entropy": 0.9283445164377936, "calibration/buffer_distribution_entropy": 0.9310390835813201, "calibration/confidence_entropy": 0.44306056288304363, "calibration/coverage@0%": 0.08046875, "calibration/coverage@1%": 0.08046875, "calibration/coverage@10%": 0.40859375, "calibration/coverage@15%": 0.51640625, "calibration/coverage@20%": 0.59140625, "calibration/coverage@25%": 0.66015625, "calibration/coverage@30%": 0.7515625, "calibration/coverage@5%": 0.21484375, "calibration/ece": 0.15614833606636924, "calibration/mean_confidence": 0.5529057898582223, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 464.0, "completions/max_terminated_length": 464.0, "completions/mean_length": 201.409375, "completions/mean_terminated_length": 201.409375, "completions/min_length": 93.6, "completions/min_terminated_length": 93.6, "epoch": 0.928, "grad_norm": 0.0014505106955766678, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 982779906.0, "reward": 1.0353971242904663, "reward_std": 0.06244761645793915, "rewards/accuracy_reward": 0.6052734375, "rewards/brier_reward": 0.8278081059455872, "rewards/confidence_uniqueness_reward": 0.9446945190429688, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0016608674312010407, "rewards/frontier_coverage_1": 0.11747038513422012, "rewards/frontier_coverage_10": 0.10292258858680725, "rewards/frontier_coverage_15": 0.07811000794172288, "rewards/frontier_coverage_20": 0.07272942364215851, "rewards/frontier_coverage_25": 0.12077962756156921, "rewards/frontier_coverage_5": 0.11747038513422012, "rewards/frontier_ece_reward": 0.0025405031628906727, "signal/accuracy_reward/centered_abs_mean": 0.0777587890625, "signal/accuracy_reward/group_std_mean": 0.10788596123456955, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03887939453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03887939453125, "signal/advantage_abs_mean": 0.0458635076880455, "signal/advantage_pre_scale_abs_mean": 0.0458635076880455, "signal/advantage_pre_scale_std": 0.09391386359930039, "signal/advantage_std": 0.09391386359930039, "signal/brier_reward/centered_abs_mean": 0.09663857668638229, "signal/brier_reward/group_std_mean": 0.12685683369636536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012079822085797786, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012079822085797786, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02300581932067871, "signal/confidence_uniqueness_reward/group_std_mean": 0.028626967594027518, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028757274150848387, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028757274150848387, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015263804234564304, "signal/frontier_aurc_reward/group_std_mean": 0.0025658855913206933, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.7322209280100652e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.7322209280100652e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13058804869651794, "signal/frontier_coverage_1/group_std_mean": 0.17300075590610503, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002337525924667716, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002337525924667716, "signal/frontier_coverage_10/centered_abs_mean": 0.10771108269691468, "signal/frontier_coverage_10/group_std_mean": 0.14299528300762177, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019280282547697424, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019280282547697424, "signal/frontier_coverage_15/centered_abs_mean": 0.0730916753411293, "signal/frontier_coverage_15/group_std_mean": 0.09659909605979919, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013083409518003463, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013083409518003463, "signal/frontier_coverage_20/centered_abs_mean": 0.05504903867840767, "signal/frontier_coverage_20/group_std_mean": 0.07065875232219695, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009853777824901044, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009853777824901044, "signal/frontier_coverage_25/centered_abs_mean": 0.06587158292531967, "signal/frontier_coverage_25/group_std_mean": 0.08444809466600418, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001179101294837892, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001179101294837892, "signal/frontier_coverage_5/centered_abs_mean": 0.13058804869651794, "signal/frontier_coverage_5/group_std_mean": 0.17300075590610503, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002337525924667716, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002337525924667716, "signal/frontier_ece_reward/centered_abs_mean": 0.002657411713153124, "signal/frontier_ece_reward/group_std_mean": 0.003528282977640629, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003321764641441405, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003321764641441405, "step": 290 }, { "calibration/aurc": 0.227605377702519, "calibration/batch_distribution_entropy": 0.8678024925217093, "calibration/buffer_distribution_entropy": 0.9332008553883091, "calibration/confidence_entropy": 0.39428912795582655, "calibration/coverage@0%": 0.16796875, "calibration/coverage@1%": 0.17578125, "calibration/coverage@10%": 0.3359375, "calibration/coverage@15%": 0.415625, "calibration/coverage@20%": 0.48515625, "calibration/coverage@25%": 0.56015625, "calibration/coverage@30%": 0.61484375, "calibration/coverage@5%": 0.2453125, "calibration/ece": 0.09917310585392751, "calibration/mean_confidence": 0.4722144451024394, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 920.0, "completions/max_terminated_length": 489.2, "completions/mean_length": 202.98466796875, "completions/mean_terminated_length": 202.72425537109376, "completions/min_length": 100.2, "completions/min_terminated_length": 100.2, "epoch": 0.944, "grad_norm": 0.0016979072242975235, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 999833893.0, "reward": 1.0405084133148192, "reward_std": 0.07115750387310982, "rewards/accuracy_reward": 0.616015625, "rewards/brier_reward": 0.8274973273277283, "rewards/confidence_uniqueness_reward": 0.9424091815948487, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.001480784686282277, "rewards/frontier_coverage_1": 0.11875949800014496, "rewards/frontier_coverage_10": 0.10412099286913871, "rewards/frontier_coverage_15": 0.08062837272882462, "rewards/frontier_coverage_20": 0.0747826412320137, "rewards/frontier_coverage_25": 0.12094295620918274, "rewards/frontier_coverage_5": 0.11873992830514908, "rewards/frontier_ece_reward": 0.0025975925382226706, "signal/accuracy_reward/centered_abs_mean": 0.10352783203125, "signal/accuracy_reward/group_std_mean": 0.1342850521206856, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051763916015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051763916015625, "signal/advantage_abs_mean": 0.054445850849151614, "signal/advantage_pre_scale_abs_mean": 0.054445850849151614, "signal/advantage_pre_scale_std": 0.10500096529722214, "signal/advantage_std": 0.10500096529722214, "signal/brier_reward/centered_abs_mean": 0.1040783628821373, "signal/brier_reward/group_std_mean": 0.13488138020038604, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013009795360267163, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013009795360267163, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02404037192463875, "signal/confidence_uniqueness_reward/group_std_mean": 0.03030591309070587, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030050464905798436, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030050464905798436, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001270879921503365, "signal/frontier_aurc_reward/group_std_mean": 0.0021014282014220954, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.2748749870515894e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.2748749870515894e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14869227409362792, "signal/frontier_coverage_1/group_std_mean": 0.19285742044448853, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026615916285663843, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026615916285663843, "signal/frontier_coverage_10/centered_abs_mean": 0.11862881183624267, "signal/frontier_coverage_10/group_std_mean": 0.15488066375255585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021234555868431928, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021234555868431928, "signal/frontier_coverage_15/centered_abs_mean": 0.07927502691745758, "signal/frontier_coverage_15/group_std_mean": 0.10365805774927139, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014190229121595621, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014190229121595621, "signal/frontier_coverage_20/centered_abs_mean": 0.05630268827080727, "signal/frontier_coverage_20/group_std_mean": 0.0722155287861824, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010078180697746576, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010078180697746576, "signal/frontier_coverage_25/centered_abs_mean": 0.06522702798247337, "signal/frontier_coverage_25/group_std_mean": 0.08425245583057403, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00116756372153759, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00116756372153759, "signal/frontier_coverage_5/centered_abs_mean": 0.14861850142478944, "signal/frontier_coverage_5/group_std_mean": 0.19274679124355315, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002660271106287837, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002660271106287837, "signal/frontier_ece_reward/centered_abs_mean": 0.002932385681197047, "signal/frontier_ece_reward/group_std_mean": 0.0038601367734372614, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003665482101496309, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003665482101496309, "step": 295 }, { "calibration/aurc": 0.22753061737104918, "calibration/batch_distribution_entropy": 0.8395263422525059, "calibration/buffer_distribution_entropy": 0.9327987097688348, "calibration/confidence_entropy": 0.3778946036635543, "calibration/coverage@0%": 0.21328125, "calibration/coverage@1%": 0.215625, "calibration/coverage@10%": 0.4578125, "calibration/coverage@15%": 0.50625, "calibration/coverage@20%": 0.57734375, "calibration/coverage@25%": 0.63671875, "calibration/coverage@30%": 0.690625, "calibration/coverage@5%": 0.4203125, "calibration/ece": 0.21041953124999999, "calibration/mean_confidence": 0.63975046875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 683.0, "completions/max_terminated_length": 455.4, "completions/mean_length": 202.90927734375, "completions/mean_terminated_length": 202.77940368652344, "completions/min_length": 99.6, "completions/min_terminated_length": 99.6, "epoch": 0.96, "grad_norm": 0.0016687435563653708, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 1016852004.0, "reward": 1.0291436433792114, "reward_std": 0.0583199568092823, "rewards/accuracy_reward": 0.58271484375, "rewards/brier_reward": 0.8477208733558654, "rewards/confidence_uniqueness_reward": 0.9432453274726867, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0021191579522565006, "rewards/frontier_coverage_1": 0.1549065351486206, "rewards/frontier_coverage_10": 0.12819213569164276, "rewards/frontier_coverage_15": 0.09457356631755828, "rewards/frontier_coverage_20": 0.08866416066884994, "rewards/frontier_coverage_25": 0.14134843051433563, "rewards/frontier_coverage_5": 0.15478427112102508, "rewards/frontier_ece_reward": 0.0032231774181127547, "signal/accuracy_reward/centered_abs_mean": 0.072796630859375, "signal/accuracy_reward/group_std_mean": 0.10131096243858337, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363983154296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0363983154296875, "signal/advantage_abs_mean": 0.042303390055894854, "signal/advantage_pre_scale_abs_mean": 0.042303390055894854, "signal/advantage_pre_scale_std": 0.0915198415517807, "signal/advantage_std": 0.0915198415517807, "signal/brier_reward/centered_abs_mean": 0.08765042722225189, "signal/brier_reward/group_std_mean": 0.11655679643154145, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010956303402781486, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.010956303402781486, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024246321246027946, "signal/confidence_uniqueness_reward/group_std_mean": 0.030507474020123482, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030307901557534932, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030307901557534932, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017094084527343512, "signal/frontier_aurc_reward/group_std_mean": 0.0029599607922136785, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.059841037611477e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.059841037611477e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12009998559951782, "signal/frontier_coverage_1/group_std_mean": 0.15766243636608124, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002149789733812213, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002149789733812213, "signal/frontier_coverage_10/centered_abs_mean": 0.09564173370599746, "signal/frontier_coverage_10/group_std_mean": 0.12548429369926453, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017119870288297534, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017119870288297534, "signal/frontier_coverage_15/centered_abs_mean": 0.06683021634817124, "signal/frontier_coverage_15/group_std_mean": 0.08725652545690536, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011962608667090535, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011962608667090535, "signal/frontier_coverage_20/centered_abs_mean": 0.052955988049507144, "signal/frontier_coverage_20/group_std_mean": 0.06789801940321923, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009479121654294431, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009479121654294431, "signal/frontier_coverage_25/centered_abs_mean": 0.06407563537359237, "signal/frontier_coverage_25/group_std_mean": 0.08428025245666504, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001146953902207315, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001146953902207315, "signal/frontier_coverage_5/centered_abs_mean": 0.1199414610862732, "signal/frontier_coverage_5/group_std_mean": 0.15745915472507477, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002146952087059617, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002146952087059617, "signal/frontier_ece_reward/centered_abs_mean": 0.0024932647589594125, "signal/frontier_ece_reward/group_std_mean": 0.003281328594312072, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00031165809486992656, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00031165809486992656, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.40288964253104903, "eval_calibration/batch_distribution_entropy": 0.9055634924361762, "eval_calibration/buffer_distribution_entropy": 0.9315158471490221, "eval_calibration/confidence_entropy": 0.4507296505092381, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.15625, "eval_calibration/coverage@20%": 0.15625, "eval_calibration/coverage@25%": 0.28125, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.2239203125, "eval_calibration/mean_confidence": 0.5807953125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 390.0, "eval_completions/max_terminated_length": 390.0, "eval_completions/mean_length": 201.66341400146484, "eval_completions/mean_terminated_length": 201.66341400146484, "eval_completions/min_length": 103.0, "eval_completions/min_terminated_length": 103.0, "eval_loss": 0.0, "eval_num_tokens": 1016852004.0, "eval_reward": 0.9454332888126373, "eval_reward_std": 0.25646254420280457, "eval_rewards/accuracy_reward": 0.44140625, "eval_rewards/brier_reward": 0.7841920852661133, "eval_rewards/confidence_uniqueness_reward": 0.8974609375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.005405109841376543, "eval_rewards/frontier_coverage_1": 0.19863545149564743, "eval_rewards/frontier_coverage_10": 0.15553244948387146, "eval_rewards/frontier_coverage_15": 0.10400541499257088, "eval_rewards/frontier_coverage_20": 0.06785453855991364, "eval_rewards/frontier_coverage_25": 0.06973126530647278, "eval_rewards/frontier_coverage_5": 0.19848963618278503, "eval_rewards/frontier_ece_reward": 0.0032259345753118396, "eval_runtime": 10.2094, "eval_samples_per_second": 48.974, "eval_signal/accuracy_reward/centered_abs_mean": 0.4755859375, "eval_signal/accuracy_reward/group_std_mean": 0.49512895941734314, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23779296875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23779296875, "eval_signal/advantage_abs_mean": 0.24011892080307007, "eval_signal/advantage_pre_scale_abs_mean": 0.24011892080307007, "eval_signal/advantage_pre_scale_std": 0.2530638575553894, "eval_signal/advantage_std": 0.2530638575553894, "eval_signal/brier_reward/centered_abs_mean": 0.2406034916639328, "eval_signal/brier_reward/group_std_mean": 0.29827988147735596, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0300754364579916, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0300754364579916, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0444183349609375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.051535068079829216, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055522918701171875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055522918701171875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007637398317456245, "eval_signal/frontier_aurc_reward/group_std_mean": 0.01684427261352539, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00013670942280441523, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00013670942280441523, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.31974154710769653, "eval_signal/frontier_coverage_1/group_std_mean": 0.3967055380344391, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005723373498767614, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005723373498767614, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2480403035879135, "eval_signal/frontier_coverage_10/group_std_mean": 0.31127846240997314, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004439921351149678, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004439921351149678, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.15623818337917328, "eval_signal/frontier_coverage_15/group_std_mean": 0.20206287503242493, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027966632042080164, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027966632042080164, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10376439616084099, "eval_signal/frontier_coverage_20/group_std_mean": 0.13048581779003143, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018573826528154314, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018573826528154314, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.19033470749855042, "eval_signal/frontier_coverage_25/group_std_mean": 0.24326416850090027, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034069910179823637, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034069910179823637, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.31926435232162476, "eval_signal/frontier_coverage_5/group_std_mean": 0.3961242437362671, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057148318737745285, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057148318737745285, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.004807816818356514, "eval_signal/frontier_ece_reward/group_std_mean": 0.00625448627397418, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006009771022945642, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006009771022945642, "eval_steps_per_second": 0.196, "step": 300 }, { "epoch": 0.96, "step": 300, "train_probe_calibration/aurc": 0.11477367801974697, "train_probe_calibration/batch_distribution_entropy": 0.8259865898626315, "train_probe_calibration/buffer_distribution_entropy": 0.9317202950173966, "train_probe_calibration/confidence_entropy": 0.39752484027856694, "train_probe_calibration/coverage@0%": 0.140625, "train_probe_calibration/coverage@1%": 0.140625, "train_probe_calibration/coverage@10%": 0.734375, "train_probe_calibration/coverage@15%": 0.8125, "train_probe_calibration/coverage@20%": 0.875, "train_probe_calibration/coverage@25%": 0.9375, "train_probe_calibration/coverage@30%": 0.96875, "train_probe_calibration/coverage@5%": 0.421875, "train_probe_calibration/ece": 0.16384375, "train_probe_calibration/mean_confidence": 0.65728125, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 349.0, "train_probe_completions/max_terminated_length": 349.0, "train_probe_completions/mean_length": 201.03668975830078, "train_probe_completions/mean_terminated_length": 201.03668975830078, "train_probe_completions/min_length": 111.5, "train_probe_completions/min_terminated_length": 111.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 1016852004.0, "train_probe_reward": 1.0789863467216492, "train_probe_reward_std": 0.22811973094940186, "train_probe_rewards/accuracy_reward": 0.693359375, "train_probe_rewards/brier_reward": 0.8687321543693542, "train_probe_rewards/confidence_uniqueness_reward": 0.900390625, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.001137724844738841, "train_probe_rewards/frontier_coverage_1": 0.09572022780776024, "train_probe_rewards/frontier_coverage_10": 0.0828697718679905, "train_probe_rewards/frontier_coverage_15": 0.07084467262029648, "train_probe_rewards/frontier_coverage_20": 0.08797503262758255, "train_probe_rewards/frontier_coverage_25": 0.17389176040887833, "train_probe_rewards/frontier_coverage_5": 0.09541856124997139, "train_probe_rewards/frontier_ece_reward": 0.0026104446733370423, "train_probe_runtime": 9.6991, "train_probe_samples_per_second": 51.551, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4151611328125, "train_probe_signal/accuracy_reward/group_std_mean": 0.46192415058612823, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.20758056640625, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20758056640625, "train_probe_signal/advantage_abs_mean": 0.20193417370319366, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20193417370319366, "train_probe_signal/advantage_pre_scale_std": 0.2256685495376587, "train_probe_signal/advantage_std": 0.2256685495376587, "train_probe_signal/brier_reward/centered_abs_mean": 0.15471985936164856, "train_probe_signal/brier_reward/group_std_mean": 0.21638543158769608, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01933998242020607, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01933998242020607, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.039031982421875, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.04604136198759079, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004878997802734375, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004878997802734375, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0020724779460579157, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003993918187916279, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.709735210577492e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.709735210577492e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2661040276288986, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.37349459528923035, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004763261880725622, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004763261880725622, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.20318175852298737, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.28969065845012665, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036369531881064177, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036369531881064177, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12607631087303162, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.18381474167108536, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022567659616470337, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022567659616470337, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.08980197459459305, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.11331581324338913, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016074551967903972, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016074551967903972, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.17201132327318192, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.2077884078025818, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0030790024902671576, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030790024902671576, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.26485244929790497, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3718564957380295, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004740858683362603, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004740858683362603, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.004174819332547486, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.005938299465924501, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005218524165684357, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005218524165684357, "train_probe_steps_per_second": 0.206 }, { "calibration/aurc": 0.19888012009032868, "calibration/batch_distribution_entropy": 0.8751961847929361, "calibration/buffer_distribution_entropy": 0.9316692924834962, "calibration/confidence_entropy": 0.4188681558773877, "calibration/coverage@0%": 0.0359375, "calibration/coverage@1%": 0.0359375, "calibration/coverage@10%": 0.43515625, "calibration/coverage@15%": 0.4859375, "calibration/coverage@20%": 0.53671875, "calibration/coverage@25%": 0.61484375, "calibration/coverage@30%": 0.72421875, "calibration/coverage@5%": 0.18515625, "calibration/ece": 0.14540309140625002, "calibration/mean_confidence": 0.64485612734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 931.6, "completions/max_terminated_length": 540.2, "completions/mean_length": 205.41650390625, "completions/mean_terminated_length": 205.0261260986328, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.976, "grad_norm": 0.0016158220823854208, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 1033816589.0, "reward": 1.0453666210174561, "reward_std": 0.06708120256662368, "rewards/accuracy_reward": 0.62216796875, "rewards/brier_reward": 0.8398854255676269, "rewards/confidence_uniqueness_reward": 0.9433197736740112, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0018908762140199542, "rewards/frontier_coverage_1": 0.11496728807687759, "rewards/frontier_coverage_10": 0.09931781068444252, "rewards/frontier_coverage_15": 0.07630908414721489, "rewards/frontier_coverage_20": 0.07998319193720818, "rewards/frontier_coverage_25": 0.1425451785326004, "rewards/frontier_coverage_5": 0.11488909721374511, "rewards/frontier_ece_reward": 0.0025672421557828783, "signal/accuracy_reward/centered_abs_mean": 0.084222412109375, "signal/accuracy_reward/group_std_mean": 0.11655709967017173, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0421112060546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0421112060546875, "signal/advantage_abs_mean": 0.048955275863409045, "signal/advantage_pre_scale_abs_mean": 0.048955275863409045, "signal/advantage_pre_scale_std": 0.10104106813669204, "signal/advantage_std": 0.10104106813669204, "signal/brier_reward/centered_abs_mean": 0.09183044731616974, "signal/brier_reward/group_std_mean": 0.12157261669635773, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011478805914521217, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011478805914521217, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02421579249203205, "signal/confidence_uniqueness_reward/group_std_mean": 0.03093937486410141, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030269740615040063, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030269740615040063, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015749115496873855, "signal/frontier_aurc_reward/group_std_mean": 0.0027885420713573694, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.819091714627575e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.819091714627575e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12192182391881942, "signal/frontier_coverage_1/group_std_mean": 0.15978844761848449, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002182400575838983, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002182400575838983, "signal/frontier_coverage_10/centered_abs_mean": 0.09427153617143631, "signal/frontier_coverage_10/group_std_mean": 0.12419438064098358, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0016874604858458041, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016874604858458041, "signal/frontier_coverage_15/centered_abs_mean": 0.06523038446903229, "signal/frontier_coverage_15/group_std_mean": 0.08563594371080399, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0011676238849759103, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011676238849759103, "signal/frontier_coverage_20/centered_abs_mean": 0.05165816843509674, "signal/frontier_coverage_20/group_std_mean": 0.06644249334931374, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009246811503544449, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009246811503544449, "signal/frontier_coverage_25/centered_abs_mean": 0.06867350712418556, "signal/frontier_coverage_25/group_std_mean": 0.09026172012090683, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012292557861655951, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012292557861655951, "signal/frontier_coverage_5/centered_abs_mean": 0.1216941773891449, "signal/frontier_coverage_5/group_std_mean": 0.15950067937374116, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021783256670460105, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021783256670460105, "signal/frontier_ece_reward/centered_abs_mean": 0.00236211777664721, "signal/frontier_ece_reward/group_std_mean": 0.003097822656854987, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029526472208090125, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029526472208090125, "step": 305 }, { "calibration/aurc": 0.27617015723730154, "calibration/batch_distribution_entropy": 0.8719944630571315, "calibration/buffer_distribution_entropy": 0.9307879294053564, "calibration/confidence_entropy": 0.3881166828523776, "calibration/coverage@0%": 0.15625, "calibration/coverage@1%": 0.18515625, "calibration/coverage@10%": 0.31796875, "calibration/coverage@15%": 0.36171875, "calibration/coverage@20%": 0.4203125, "calibration/coverage@25%": 0.48046875, "calibration/coverage@30%": 0.53515625, "calibration/coverage@5%": 0.27578125, "calibration/ece": 0.1480644396551724, "calibration/mean_confidence": 0.4999269396551724, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.4, "completions/max_terminated_length": 497.4, "completions/mean_length": 200.55478515625, "completions/mean_terminated_length": 200.55478515625, "completions/min_length": 91.2, "completions/min_terminated_length": 91.2, "epoch": 0.992, "grad_norm": 0.0018319895025342703, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 1050998750.0, "reward": 1.0178974866867065, "reward_std": 0.062009623646736144, "rewards/accuracy_reward": 0.571875, "rewards/brier_reward": 0.818060839176178, "rewards/confidence_uniqueness_reward": 0.9377853393554687, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0024078495102003218, "rewards/frontier_coverage_1": 0.13882496058940888, "rewards/frontier_coverage_10": 0.11529144048690795, "rewards/frontier_coverage_15": 0.08634113371372223, "rewards/frontier_coverage_20": 0.08047932088375091, "rewards/frontier_coverage_25": 0.1226568266749382, "rewards/frontier_coverage_5": 0.13870886862277984, "rewards/frontier_ece_reward": 0.0024728897726163266, "signal/accuracy_reward/centered_abs_mean": 0.08270263671875, "signal/accuracy_reward/group_std_mean": 0.10843254029750823, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041351318359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.041351318359375, "signal/advantage_abs_mean": 0.04691413417458534, "signal/advantage_pre_scale_abs_mean": 0.04691413417458534, "signal/advantage_pre_scale_std": 0.09592601060867309, "signal/advantage_std": 0.09592601060867309, "signal/brier_reward/centered_abs_mean": 0.0934365376830101, "signal/brier_reward/group_std_mean": 0.12053980976343155, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011679567210376263, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011679567210376263, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025814294815063477, "signal/confidence_uniqueness_reward/group_std_mean": 0.03233279511332512, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032267868518829346, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032267868518829346, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.001967202941887081, "signal/frontier_aurc_reward/group_std_mean": 0.0030709158163517714, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.521293183439411e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.521293183439411e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12599806636571884, "signal/frontier_coverage_1/group_std_mean": 0.16160787940025328, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022553652757778763, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022553652757778763, "signal/frontier_coverage_10/centered_abs_mean": 0.09844744727015495, "signal/frontier_coverage_10/group_std_mean": 0.12621570527553558, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001762209297157824, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001762209297157824, "signal/frontier_coverage_15/centered_abs_mean": 0.06843779757618904, "signal/frontier_coverage_15/group_std_mean": 0.0877251997590065, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012250364990904928, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012250364990904928, "signal/frontier_coverage_20/centered_abs_mean": 0.05275077372789383, "signal/frontier_coverage_20/group_std_mean": 0.06691490858793259, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009442388545721769, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009442388545721769, "signal/frontier_coverage_25/centered_abs_mean": 0.06476361751556396, "signal/frontier_coverage_25/group_std_mean": 0.08386294692754745, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011592687340453267, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011592687340453267, "signal/frontier_coverage_5/centered_abs_mean": 0.12582006603479384, "signal/frontier_coverage_5/group_std_mean": 0.16137229949235915, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022521790815517306, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022521790815517306, "signal/frontier_ece_reward/centered_abs_mean": 0.0023749925196170805, "signal/frontier_ece_reward/group_std_mean": 0.003116936841979623, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00029687406495213506, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00029687406495213506, "step": 310 }, { "calibration/aurc": 0.07394829778523786, "calibration/batch_distribution_entropy": 0.6858898086044589, "calibration/buffer_distribution_entropy": 0.9311619746607875, "calibration/confidence_entropy": 0.34007166230520547, "calibration/coverage@0%": 0.10546875, "calibration/coverage@1%": 0.10546875, "calibration/coverage@10%": 0.802734375, "calibration/coverage@15%": 0.943359375, "calibration/coverage@20%": 0.970703125, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5, "calibration/ece": 0.136876953125, "calibration/mean_confidence": 0.7878769531250001, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 386.5, "completions/max_terminated_length": 386.5, "completions/mean_length": 199.02162170410156, "completions/mean_terminated_length": 199.02162170410156, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.9984, "num_tokens": 1057815101.0, "reward": 1.050271451473236, "reward_std": 0.06441943719983101, "rewards/accuracy_reward": 0.645263671875, "rewards/brier_reward": 0.8100776672363281, "rewards/confidence_uniqueness_reward": 0.9436569213867188, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.001602545497007668, "rewards/frontier_coverage_1": 0.07218670099973679, "rewards/frontier_coverage_10": 0.0615706741809845, "rewards/frontier_coverage_15": 0.05613754317164421, "rewards/frontier_coverage_20": 0.06846107542514801, "rewards/frontier_coverage_25": 0.12920933216810226, "rewards/frontier_coverage_5": 0.07211882993578911, "rewards/frontier_ece_reward": 0.0017852028249762952, "signal/accuracy_reward/centered_abs_mean": 0.0796661376953125, "signal/accuracy_reward/group_std_mean": 0.11285967007279396, "signal/accuracy_reward/group_zero_std_frac": 0.6484375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03983306884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03983306884765625, "signal/advantage_abs_mean": 0.047435952350497246, "signal/advantage_pre_scale_abs_mean": 0.047435952350497246, "signal/advantage_pre_scale_std": 0.09802256524562836, "signal/advantage_std": 0.09802256524562836, "signal/brier_reward/centered_abs_mean": 0.10139483213424683, "signal/brier_reward/group_std_mean": 0.12895793095231056, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012674354016780853, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012674354016780853, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023676156997680664, "signal/confidence_uniqueness_reward/group_std_mean": 0.029014757834374905, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002959519624710083, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002959519624710083, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015349971363320947, "signal/frontier_aurc_reward/group_std_mean": 0.002599976258352399, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.747644975897856e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.747644975897856e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12439806759357452, "signal/frontier_coverage_1/group_std_mean": 0.16710513830184937, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002226725220680237, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002226725220680237, "signal/frontier_coverage_10/centered_abs_mean": 0.09704583883285522, "signal/frontier_coverage_10/group_std_mean": 0.13068146258592606, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017371204448863864, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017371204448863864, "signal/frontier_coverage_15/centered_abs_mean": 0.06808548793196678, "signal/frontier_coverage_15/group_std_mean": 0.0913914144039154, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001218730176333338, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001218730176333338, "signal/frontier_coverage_20/centered_abs_mean": 0.05189245194196701, "signal/frontier_coverage_20/group_std_mean": 0.06728483736515045, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0009288748260587454, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009288748260587454, "signal/frontier_coverage_25/centered_abs_mean": 0.06688933074474335, "signal/frontier_coverage_25/group_std_mean": 0.08471940457820892, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011973190703429282, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011973190703429282, "signal/frontier_coverage_5/centered_abs_mean": 0.12402944266796112, "signal/frontier_coverage_5/group_std_mean": 0.16661176830530167, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002220126916654408, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002220126916654408, "signal/frontier_ece_reward/centered_abs_mean": 0.002541982219554484, "signal/frontier_ece_reward/group_std_mean": 0.0034003107575699687, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003177477774443105, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003177477774443105, "step": 312, "total_flos": 0.0, "train_loss": 0.00456765069126656, "train_runtime": 31094.48, "train_samples_per_second": 0.643, "train_steps_per_second": 0.01 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1057815101, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }