{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5847039164885837, "calibration/batch_distribution_entropy": 0.6584268879399373, "calibration/confidence_entropy": 0.34608582603933763, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.45991905567284935, "calibration/mean_confidence": 0.7899003636588005, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03740234375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1504.2, "completions/mean_length": 272.08525390625, "completions/mean_terminated_length": 222.96121215820312, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.016, "grad_norm": 0.13381105661392212, "learning_rate": 3.1249999999999997e-07, "loss": 0.0938, "num_tokens": 17630185.0, "reward": 0.6633403062820434, "reward_std": 0.5005818009376526, "rewards/accuracy_reward": 0.26064453125, "rewards/brier_reward": 0.40211123824119566, "rewards/confidence_uniqueness_reward": 0.4817495226860046, "rewards/format_reward": 0.6783203125, "rewards/frontier_aurc_reward": 0.3331014633178711, "rewards/frontier_coverage_1": 0.3331014633178711, "rewards/frontier_coverage_10": 0.3331014633178711, "rewards/frontier_coverage_15": 0.3331014633178711, "rewards/frontier_coverage_20": 0.3331014633178711, "rewards/frontier_coverage_25": 0.3331014633178711, "rewards/frontier_coverage_5": 0.3331014633178711, "rewards/frontier_ece_reward": 0.3331014633178711, "signal/accuracy_reward/centered_abs_mean": 0.272442626953125, "signal/accuracy_reward/group_std_mean": 0.31336791515350343, "signal/accuracy_reward/group_zero_std_frac": 0.265625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1362213134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1362213134765625, "signal/advantage_abs_mean": 0.4301945328712463, "signal/advantage_pre_scale_abs_mean": 0.4301945328712463, "signal/advantage_pre_scale_std": 0.5090484619140625, "signal/advantage_std": 0.5090484619140625, "signal/brier_reward/centered_abs_mean": 0.3353294968605042, "signal/brier_reward/group_std_mean": 0.3795027434825897, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04191618710756302, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04191618710756302, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.29859185218811035, "signal/confidence_uniqueness_reward/group_std_mean": 0.34845991134643556, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.037323981523513794, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.037323981523513794, "signal/format_reward/centered_abs_mean": 0.4048095703125, "signal/format_reward/group_std_mean": 0.4541024386882782, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.20240478515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.20240478515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.31411888003349303, "signal/frontier_aurc_reward/group_std_mean": 0.36313520073890687, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_1/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_1/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_10/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_10/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_15/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_15/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_20/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_20/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_25/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_25/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_5/centered_abs_mean": 0.31411888003349303, "signal/frontier_coverage_5/group_std_mean": 0.36313520073890687, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005622727982699871, "signal/frontier_ece_reward/centered_abs_mean": 0.31411888003349303, "signal/frontier_ece_reward/group_std_mean": 0.36313520073890687, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03926486000418663, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03926486000418663, "step": 5 }, { "calibration/aurc": 0.6457139978886784, "calibration/batch_distribution_entropy": 0.6441986464829549, "calibration/confidence_entropy": 0.34328724550232764, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5055546442235969, "calibration/mean_confidence": 0.794898848653782, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03447265625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1509.8, "completions/mean_length": 257.67353515625, "completions/mean_terminated_length": 212.0354034423828, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.0325239896774292, "learning_rate": 6.249999999999999e-07, "loss": 0.0951, "num_tokens": 35369114.0, "reward": 0.6889099597930908, "reward_std": 0.4666505217552185, "rewards/accuracy_reward": 0.2486328125, "rewards/brier_reward": 0.41154505014419557, "rewards/confidence_uniqueness_reward": 0.5230929255485535, "rewards/format_reward": 0.72841796875, "rewards/frontier_aurc_reward": 0.3338188171386719, "rewards/frontier_coverage_1": 0.3338188171386719, "rewards/frontier_coverage_10": 0.3338188171386719, "rewards/frontier_coverage_15": 0.3338188171386719, "rewards/frontier_coverage_20": 0.3338188171386719, "rewards/frontier_coverage_25": 0.3338188171386719, "rewards/frontier_coverage_5": 0.3338188171386719, "rewards/frontier_ece_reward": 0.3338188171386719, "signal/accuracy_reward/centered_abs_mean": 0.246484375, "signal/accuracy_reward/group_std_mean": 0.2941208004951477, "signal/accuracy_reward/group_zero_std_frac": 0.278125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1232421875, "signal/advantage_abs_mean": 0.38684009909629824, "signal/advantage_pre_scale_abs_mean": 0.38684009909629824, "signal/advantage_pre_scale_std": 0.47523062229156493, "signal/advantage_std": 0.47523062229156493, "signal/brier_reward/centered_abs_mean": 0.31702865958213805, "signal/brier_reward/group_std_mean": 0.3654074013233185, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.039628582447767256, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.039628582447767256, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.27955763339996337, "signal/confidence_uniqueness_reward/group_std_mean": 0.33765636682510375, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03494470417499542, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.03494470417499542, "signal/format_reward/centered_abs_mean": 0.367266845703125, "signal/format_reward/group_std_mean": 0.4314119637012482, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1836334228515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1836334228515625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2960649013519287, "signal/frontier_aurc_reward/group_std_mean": 0.3490014672279358, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_1/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_1/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_10/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_10/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_15/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_15/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_20/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_20/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_25/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_25/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_5/centered_abs_mean": 0.2960649013519287, "signal/frontier_coverage_5/group_std_mean": 0.3490014672279358, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005299561750143766, "signal/frontier_ece_reward/centered_abs_mean": 0.2960649013519287, "signal/frontier_ece_reward/group_std_mean": 0.3490014672279358, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03700811266899109, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03700811266899109, "step": 10 }, { "calibration/aurc": 0.512972896770905, "calibration/batch_distribution_entropy": 0.6455565397380528, "calibration/buffer_distribution_entropy": 0.663353331603825, "calibration/confidence_entropy": 0.34031164024812754, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.1304147465437788, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3992726308201509, "calibration/mean_confidence": 0.7967083523664463, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01474609375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1469.2, "completions/mean_length": 195.08359375, "completions/mean_terminated_length": 175.09444580078124, "completions/min_length": 15.0, "completions/min_terminated_length": 15.0, "epoch": 0.048, "grad_norm": 0.014204099774360657, "learning_rate": 9.374999999999999e-07, "loss": 0.0581, "num_tokens": 52415506.0, "reward": 0.8369853854179382, "reward_std": 0.3447115957736969, "rewards/accuracy_reward": 0.32392578125, "rewards/brier_reward": 0.5263184905052185, "rewards/confidence_uniqueness_reward": 0.6511122345924377, "rewards/format_reward": 0.8958984375, "rewards/frontier_aurc_reward": 0.31629036981612446, "rewards/frontier_coverage_1": 0.3337858706712723, "rewards/frontier_coverage_10": 0.3337858706712723, "rewards/frontier_coverage_15": 0.3337858706712723, "rewards/frontier_coverage_20": 0.3337858706712723, "rewards/frontier_coverage_25": 0.3337858706712723, "rewards/frontier_coverage_5": 0.3337858706712723, "rewards/frontier_ece_reward": 0.30707414969801905, "signal/accuracy_reward/centered_abs_mean": 0.215557861328125, "signal/accuracy_reward/group_std_mean": 0.2651854813098907, "signal/accuracy_reward/group_zero_std_frac": 0.321875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1077789306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1077789306640625, "signal/advantage_abs_mean": 0.2599746108055115, "signal/advantage_pre_scale_abs_mean": 0.2599746108055115, "signal/advantage_pre_scale_std": 0.3583178609609604, "signal/advantage_std": 0.3583178609609604, "signal/brier_reward/centered_abs_mean": 0.27192609906196596, "signal/brier_reward/group_std_mean": 0.32940946221351625, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.033990762382745746, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.033990762382745746, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19261950254440308, "signal/confidence_uniqueness_reward/group_std_mean": 0.2545264959335327, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024077437818050385, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.024077437818050385, "signal/format_reward/centered_abs_mean": 0.17540283203125, "signal/format_reward/group_std_mean": 0.27413243651390073, "signal/format_reward/group_zero_std_frac": 0.109375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.087701416015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.087701416015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.21203429326415063, "signal/frontier_aurc_reward/group_std_mean": 0.25519408844411373, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003795413678744808, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003795413678744808, "signal/frontier_coverage_1/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_1/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_10/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_10/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_15/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_15/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_20/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_20/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_25/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_25/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_5/centered_abs_mean": 0.2363867074251175, "signal/frontier_coverage_5/group_std_mean": 0.2943639099597931, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0042313218116760256, "signal/frontier_ece_reward/centered_abs_mean": 0.23511168360710144, "signal/frontier_ece_reward/group_std_mean": 0.2841564893722534, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.02938896045088768, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.02938896045088768, "step": 15 }, { "calibration/aurc": 0.4888797539741285, "calibration/batch_distribution_entropy": 0.6813886801759734, "calibration/buffer_distribution_entropy": 0.6586534364395229, "calibration/confidence_entropy": 0.37032147254666903, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3500033465127651, "calibration/mean_confidence": 0.7916001832678258, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00244140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1051.0, "completions/mean_length": 139.14345703125, "completions/mean_terminated_length": 135.73062591552736, "completions/min_length": 24.2, "completions/min_terminated_length": 24.2, "epoch": 0.064, "grad_norm": 0.0029658779967576265, "learning_rate": 1e-06, "loss": 0.0072, "num_tokens": 68758735.0, "reward": 0.8559809684753418, "reward_std": 0.21398123800754548, "rewards/accuracy_reward": 0.39052734375, "rewards/brier_reward": 0.5993266940116883, "rewards/confidence_uniqueness_reward": 0.7415877938270569, "rewards/format_reward": 0.98271484375, "rewards/frontier_aurc_reward": -0.006243877112865448, "rewards/frontier_coverage_1": 0.051288098096847534, "rewards/frontier_coverage_10": 0.051288098096847534, "rewards/frontier_coverage_15": 0.051288098096847534, "rewards/frontier_coverage_20": 0.051288098096847534, "rewards/frontier_coverage_25": 0.051288098096847534, "rewards/frontier_coverage_5": 0.051288098096847534, "rewards/frontier_ece_reward": -0.029207990132272245, "signal/accuracy_reward/centered_abs_mean": 0.215142822265625, "signal/accuracy_reward/group_std_mean": 0.2683002293109894, "signal/accuracy_reward/group_zero_std_frac": 0.303125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1075714111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1075714111328125, "signal/advantage_abs_mean": 0.16140898168087006, "signal/advantage_pre_scale_abs_mean": 0.16140898168087006, "signal/advantage_pre_scale_std": 0.23029825389385222, "signal/advantage_std": 0.23029825389385222, "signal/brier_reward/centered_abs_mean": 0.2396304726600647, "signal/brier_reward/group_std_mean": 0.2977387011051178, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02995380908250809, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02995380908250809, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12114065438508988, "signal/confidence_uniqueness_reward/group_std_mean": 0.1570127099752426, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015142581798136235, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.015142581798136235, "signal/format_reward/centered_abs_mean": 0.032794189453125, "signal/format_reward/group_std_mean": 0.08168496713042259, "signal/format_reward/group_zero_std_frac": 0.590625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0163970947265625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0163970947265625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0057399141602218155, "signal/frontier_aurc_reward/group_std_mean": 0.008327136002480983, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.0001027444624924101, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.0001027444624924101, "signal/frontier_coverage_1/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_1/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_10/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_10/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_15/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_15/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_20/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_20/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_25/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_25/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_5/centered_abs_mean": 0.09880194365978241, "signal/frontier_coverage_5/group_std_mean": 0.15808388888835906, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001768554700538516, "signal/frontier_ece_reward/centered_abs_mean": 0.10608904957771301, "signal/frontier_ece_reward/group_std_mean": 0.13063843846321105, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.013261131197214126, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.013261131197214126, "step": 20 }, { "calibration/aurc": 0.5833397552619999, "calibration/batch_distribution_entropy": 0.7711499566613911, "calibration/buffer_distribution_entropy": 0.6807855611919311, "calibration/confidence_entropy": 0.4415601518335488, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.39055978154292853, "calibration/mean_confidence": 0.7369087639275899, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1312.4, "completions/max_terminated_length": 663.8, "completions/mean_length": 118.03896484375, "completions/mean_terminated_length": 117.34614868164063, "completions/min_length": 36.4, "completions/min_terminated_length": 36.4, "epoch": 0.08, "grad_norm": 0.004985923878848553, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 84900606.0, "reward": 0.8875869035720825, "reward_std": 0.1687161237001419, "rewards/accuracy_reward": 0.41005859375, "rewards/brier_reward": 0.6432444810867309, "rewards/confidence_uniqueness_reward": 0.8054814100265503, "rewards/format_reward": 0.99697265625, "rewards/frontier_aurc_reward": -0.005297265853732825, "rewards/frontier_coverage_1": 0.05199873372912407, "rewards/frontier_coverage_10": 0.05199873372912407, "rewards/frontier_coverage_15": 0.05199873372912407, "rewards/frontier_coverage_20": 0.05199873372912407, "rewards/frontier_coverage_25": 0.05199873372912407, "rewards/frontier_coverage_5": 0.05199873372912407, "rewards/frontier_ece_reward": -0.02007437888532877, "signal/accuracy_reward/centered_abs_mean": 0.188824462890625, "signal/accuracy_reward/group_std_mean": 0.239495387673378, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0944122314453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0944122314453125, "signal/advantage_abs_mean": 0.13153642565011978, "signal/advantage_pre_scale_abs_mean": 0.13153642565011978, "signal/advantage_pre_scale_std": 0.19022858738899232, "signal/advantage_std": 0.19022858738899232, "signal/brier_reward/centered_abs_mean": 0.20990893840789795, "signal/brier_reward/group_std_mean": 0.26221993565559387, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026238617300987244, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026238617300987244, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07374545335769653, "signal/confidence_uniqueness_reward/group_std_mean": 0.09642878472805023, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009218181669712066, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009218181669712066, "signal/format_reward/centered_abs_mean": 0.005865478515625, "signal/format_reward/group_std_mean": 0.017125242576003074, "signal/format_reward/group_zero_std_frac": 0.903125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0029327392578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0029327392578125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0036071423441171647, "signal/frontier_aurc_reward/group_std_mean": 0.005228751804679632, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.456784321926535e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.456784321926535e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_1/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_10/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_10/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_15/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_15/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_20/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_20/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_25/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_25/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_5/centered_abs_mean": 0.1151951402425766, "signal/frontier_coverage_5/group_std_mean": 0.1752179741859436, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020619928138330577, "signal/frontier_ece_reward/centered_abs_mean": 0.09093453586101533, "signal/frontier_ece_reward/group_std_mean": 0.11268945634365082, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.011366816982626916, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.011366816982626916, "step": 25 }, { "calibration/aurc": 0.5097055641069405, "calibration/batch_distribution_entropy": 0.8475244813528956, "calibration/buffer_distribution_entropy": 0.7181974411858837, "calibration/confidence_entropy": 0.5181217659331916, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.2519970886498832, "calibration/mean_confidence": 0.6561152287939566, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1300.0, "completions/max_terminated_length": 686.4, "completions/mean_length": 118.8400390625, "completions/mean_terminated_length": 118.28626098632813, "completions/min_length": 39.0, "completions/min_terminated_length": 39.0, "epoch": 0.096, "grad_norm": 0.0021375820506364107, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 101162136.0, "reward": 0.9285231471061707, "reward_std": 0.15589244663715363, "rewards/accuracy_reward": 0.46591796875, "rewards/brier_reward": 0.7028721451759339, "rewards/confidence_uniqueness_reward": 0.8286531448364258, "rewards/format_reward": 0.99765625, "rewards/frontier_aurc_reward": -0.004147473024204373, "rewards/frontier_coverage_1": 0.0462727814912796, "rewards/frontier_coverage_10": 0.0462727814912796, "rewards/frontier_coverage_15": 0.0462727814912796, "rewards/frontier_coverage_20": 0.0462727814912796, "rewards/frontier_coverage_25": 0.0462727814912796, "rewards/frontier_coverage_5": 0.0462727814912796, "rewards/frontier_ece_reward": 0.0031993848038837313, "signal/accuracy_reward/centered_abs_mean": 0.178704833984375, "signal/accuracy_reward/group_std_mean": 0.23412654399871827, "signal/accuracy_reward/group_zero_std_frac": 0.3375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0893524169921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0893524169921875, "signal/advantage_abs_mean": 0.1198556289076805, "signal/advantage_pre_scale_abs_mean": 0.1198556289076805, "signal/advantage_pre_scale_std": 0.1744433581829071, "signal/advantage_std": 0.1744433581829071, "signal/brier_reward/centered_abs_mean": 0.1862693428993225, "signal/brier_reward/group_std_mean": 0.23494411408901214, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023283667862415314, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023283667862415314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07551151067018509, "signal/confidence_uniqueness_reward/group_std_mean": 0.09503946453332901, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009438938833773136, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009438938833773136, "signal/format_reward/centered_abs_mean": 0.0045166015625, "signal/format_reward/group_std_mean": 0.012585635110735894, "signal/format_reward/group_zero_std_frac": 0.93125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00225830078125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00225830078125, "signal/frontier_aurc_reward/centered_abs_mean": 0.00249544708058238, "signal/frontier_aurc_reward/group_std_mean": 0.003836318291723728, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4668500049738216e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4668500049738216e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_1/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_10/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_10/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_15/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_15/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_20/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_20/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_25/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_25/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_5/centered_abs_mean": 0.14569330513477324, "signal/frontier_coverage_5/group_std_mean": 0.2063480108976364, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026079101487994196, "signal/frontier_ece_reward/centered_abs_mean": 0.07989477664232254, "signal/frontier_ece_reward/group_std_mean": 0.09803950935602188, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009986847080290318, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009986847080290318, "step": 30 }, { "calibration/aurc": 0.4034232041511837, "calibration/batch_distribution_entropy": 0.8755003538996025, "calibration/buffer_distribution_entropy": 0.7633529821095395, "calibration/confidence_entropy": 0.5421376521803977, "calibration/coverage@0%": 0.0015625, "calibration/coverage@1%": 0.0015625, "calibration/coverage@10%": 0.0015625, "calibration/coverage@15%": 0.0015625, "calibration/coverage@20%": 0.0015625, "calibration/coverage@25%": 0.013701711849123211, "calibration/coverage@30%": 0.09620627146406507, "calibration/coverage@5%": 0.0015625, "calibration/ece": 0.1530395509526774, "calibration/mean_confidence": 0.6019227630383341, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1087.2, "completions/max_terminated_length": 611.8, "completions/mean_length": 127.91103515625, "completions/mean_terminated_length": 127.49808654785156, "completions/min_length": 45.4, "completions/min_terminated_length": 45.4, "epoch": 0.112, "grad_norm": 0.0021089757792651653, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 117581417.0, "reward": 0.9377779960632324, "reward_std": 0.14431948363780975, "rewards/accuracy_reward": 0.46689453125, "rewards/brier_reward": 0.7333363890647888, "rewards/confidence_uniqueness_reward": 0.8407991886138916, "rewards/format_reward": 0.9982421875, "rewards/frontier_aurc_reward": -0.003713348833844066, "rewards/frontier_coverage_1": 0.06892486587166786, "rewards/frontier_coverage_10": 0.06892486587166786, "rewards/frontier_coverage_15": 0.06892486587166786, "rewards/frontier_coverage_20": 0.06892486587166786, "rewards/frontier_coverage_25": 0.06892486587166786, "rewards/frontier_coverage_5": 0.06892486587166786, "rewards/frontier_ece_reward": 0.008853092475328594, "signal/accuracy_reward/centered_abs_mean": 0.174737548828125, "signal/accuracy_reward/group_std_mean": 0.22908719778060913, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0873687744140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0873687744140625, "signal/advantage_abs_mean": 0.11145349889993668, "signal/advantage_pre_scale_abs_mean": 0.11145349889993668, "signal/advantage_pre_scale_std": 0.16115358769893645, "signal/advantage_std": 0.16115358769893645, "signal/brier_reward/centered_abs_mean": 0.17493112981319428, "signal/brier_reward/group_std_mean": 0.22111110389232635, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021866391226649285, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021866391226649285, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07850788980722427, "signal/confidence_uniqueness_reward/group_std_mean": 0.09901682883501053, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009813486225903034, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009813486225903034, "signal/format_reward/centered_abs_mean": 0.00340576171875, "signal/format_reward/group_std_mean": 0.009943688940256833, "signal/format_reward/group_zero_std_frac": 0.94375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001702880859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001702880859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020899008959531784, "signal/frontier_aurc_reward/group_std_mean": 0.003328893566504121, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.740922475117259e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.740922475117259e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_1/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_10/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_10/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_15/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_15/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_20/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_20/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_25/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_25/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_5/centered_abs_mean": 0.18038916885852813, "signal/frontier_coverage_5/group_std_mean": 0.24032150208950043, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032289660535752772, "signal/frontier_ece_reward/centered_abs_mean": 0.06583447903394699, "signal/frontier_ece_reward/group_std_mean": 0.08168520033359528, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008229309879243373, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008229309879243373, "step": 35 }, { "calibration/aurc": 0.4206024418243185, "calibration/batch_distribution_entropy": 0.8995750737642869, "calibration/buffer_distribution_entropy": 0.8043121453818675, "calibration/confidence_entropy": 0.569409384218363, "calibration/coverage@0%": 0.0015655577299412914, "calibration/coverage@1%": 0.0015655577299412914, "calibration/coverage@10%": 0.0015655577299412914, "calibration/coverage@15%": 0.021112860812133073, "calibration/coverage@20%": 0.048890808463796474, "calibration/coverage@25%": 0.05787518346379648, "calibration/coverage@30%": 0.09778085249510762, "calibration/coverage@5%": 0.0015655577299412914, "calibration/ece": 0.12978587211412798, "calibration/mean_confidence": 0.519045968630486, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 865.2, "completions/max_terminated_length": 428.0, "completions/mean_length": 143.0123046875, "completions/mean_terminated_length": 142.74053344726562, "completions/min_length": 53.0, "completions/min_terminated_length": 53.0, "epoch": 0.128, "grad_norm": 0.0014828367857262492, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 133962535.0, "reward": 0.9425314426422119, "reward_std": 0.12577675879001618, "rewards/accuracy_reward": 0.46201171875, "rewards/brier_reward": 0.7536422848701477, "rewards/confidence_uniqueness_reward": 0.858219563961029, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.0033836792223155498, "rewards/frontier_coverage_1": 0.0849621519446373, "rewards/frontier_coverage_10": 0.0849621519446373, "rewards/frontier_coverage_15": 0.0849621519446373, "rewards/frontier_coverage_20": 0.0849621519446373, "rewards/frontier_coverage_25": 0.0849621519446373, "rewards/frontier_coverage_5": 0.0849621519446373, "rewards/frontier_ece_reward": 0.01173410825431347, "signal/accuracy_reward/centered_abs_mean": 0.153033447265625, "signal/accuracy_reward/group_std_mean": 0.20268645882606506, "signal/accuracy_reward/group_zero_std_frac": 0.421875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0765167236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0765167236328125, "signal/advantage_abs_mean": 0.09732886403799057, "signal/advantage_pre_scale_abs_mean": 0.09732886403799057, "signal/advantage_pre_scale_std": 0.14202140867710114, "signal/advantage_std": 0.14202140867710114, "signal/brier_reward/centered_abs_mean": 0.16542658805847169, "signal/brier_reward/group_std_mean": 0.20726902186870574, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02067832350730896, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02067832350730896, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06928935050964355, "signal/confidence_uniqueness_reward/group_std_mean": 0.0870005339384079, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008661168813705444, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008661168813705444, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963135540485, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.001608482375741005, "signal/frontier_aurc_reward/group_std_mean": 0.0025744295679032804, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8791834483854473e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8791834483854473e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_1/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_10/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_10/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_15/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_15/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_20/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_20/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_25/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_25/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_5/centered_abs_mean": 0.20140134692192077, "signal/frontier_coverage_5/group_std_mean": 0.2594201147556305, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036050839349627494, "signal/frontier_ece_reward/centered_abs_mean": 0.05098764970898628, "signal/frontier_ece_reward/group_std_mean": 0.06506675034761429, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006373456213623285, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006373456213623285, "step": 40 }, { "calibration/aurc": 0.25230664071765474, "calibration/batch_distribution_entropy": 0.9275190996526831, "calibration/buffer_distribution_entropy": 0.8422015589527397, "calibration/confidence_entropy": 0.5486066737565748, "calibration/coverage@0%": 0.006254586594911937, "calibration/coverage@1%": 0.006254586594911937, "calibration/coverage@10%": 0.18946076932485323, "calibration/coverage@15%": 0.2558914811643836, "calibration/coverage@20%": 0.35945985200587083, "calibration/coverage@25%": 0.5741682974559686, "calibration/coverage@30%": 0.6563600782778864, "calibration/coverage@5%": 0.03164521159491194, "calibration/ece": 0.1880487292583986, "calibration/mean_confidence": 0.4893802243864155, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 631.6, "completions/max_terminated_length": 453.2, "completions/mean_length": 151.640625, "completions/mean_terminated_length": 151.50575561523436, "completions/min_length": 57.0, "completions/min_terminated_length": 57.0, "epoch": 0.144, "grad_norm": 0.0015879464335739613, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 150465767.0, "reward": 0.995142936706543, "reward_std": 0.11998683214187622, "rewards/accuracy_reward": 0.56689453125, "rewards/brier_reward": 0.7644174814224243, "rewards/confidence_uniqueness_reward": 0.8742515563964843, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002688299072906375, "rewards/frontier_coverage_1": 0.04131883792579174, "rewards/frontier_coverage_10": 0.04131883792579174, "rewards/frontier_coverage_15": 0.04131883792579174, "rewards/frontier_coverage_20": 0.04131883792579174, "rewards/frontier_coverage_25": 0.04131883792579174, "rewards/frontier_coverage_5": 0.04131883792579174, "rewards/frontier_ece_reward": 0.022904913872480392, "signal/accuracy_reward/centered_abs_mean": 0.157574462890625, "signal/accuracy_reward/group_std_mean": 0.20815051794052125, "signal/accuracy_reward/group_zero_std_frac": 0.403125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0787872314453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0787872314453125, "signal/advantage_abs_mean": 0.09277628511190414, "signal/advantage_pre_scale_abs_mean": 0.09277628511190414, "signal/advantage_pre_scale_std": 0.13499897867441177, "signal/advantage_std": 0.13499897867441177, "signal/brier_reward/centered_abs_mean": 0.16650620400905608, "signal/brier_reward/group_std_mean": 0.2074718177318573, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02081327550113201, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02081327550113201, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05890683159232139, "signal/confidence_uniqueness_reward/group_std_mean": 0.07246142774820327, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007363353949040174, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007363353949040174, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014498829375952482, "signal/frontier_aurc_reward/group_std_mean": 0.0022392344195395707, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5952903524739667e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5952903524739667e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_1/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_10/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_10/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_15/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_15/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_20/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_20/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_25/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_25/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_5/centered_abs_mean": 0.2221655696630478, "signal/frontier_coverage_5/group_std_mean": 0.28164334297180177, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003976763784885406, "signal/frontier_ece_reward/centered_abs_mean": 0.042747367173433304, "signal/frontier_ece_reward/group_std_mean": 0.05639359876513481, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005343420896679163, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005343420896679163, "step": 45 }, { "calibration/aurc": 0.33645536213345384, "calibration/batch_distribution_entropy": 0.9596539888214259, "calibration/buffer_distribution_entropy": 0.8735012467520804, "calibration/confidence_entropy": 0.5221218785246823, "calibration/coverage@0%": 0.0011741682974559687, "calibration/coverage@1%": 0.0011741682974559687, "calibration/coverage@10%": 0.016408543297455967, "calibration/coverage@15%": 0.0781716303816047, "calibration/coverage@20%": 0.14271725171232877, "calibration/coverage@25%": 0.28699471012720157, "calibration/coverage@30%": 0.4003959760273973, "calibration/coverage@5%": 0.0011741682974559687, "calibration/ece": 0.09525141433601002, "calibration/mean_confidence": 0.4579662869305697, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 726.2, "completions/max_terminated_length": 499.4, "completions/mean_length": 159.56552734375, "completions/mean_terminated_length": 159.43108520507812, "completions/min_length": 66.6, "completions/min_terminated_length": 66.6, "epoch": 0.16, "grad_norm": 0.0011594152310863137, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 167120646.0, "reward": 0.9787898898124695, "reward_std": 0.11392782926559449, "rewards/accuracy_reward": 0.5169921875, "rewards/brier_reward": 0.7787827849388123, "rewards/confidence_uniqueness_reward": 0.8814345479011536, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002690990408882499, "rewards/frontier_coverage_1": 0.09653475433588028, "rewards/frontier_coverage_10": 0.09653475433588028, "rewards/frontier_coverage_15": 0.09653475433588028, "rewards/frontier_coverage_20": 0.09653475433588028, "rewards/frontier_coverage_25": 0.09653475433588028, "rewards/frontier_coverage_5": 0.09653475433588028, "rewards/frontier_ece_reward": 0.021138164028525353, "signal/accuracy_reward/centered_abs_mean": 0.14764404296875, "signal/accuracy_reward/group_std_mean": 0.19262417852878572, "signal/accuracy_reward/group_zero_std_frac": 0.45625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073822021484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.073822021484375, "signal/advantage_abs_mean": 0.08894423246383668, "signal/advantage_pre_scale_abs_mean": 0.08894423246383668, "signal/advantage_pre_scale_std": 0.13065763264894487, "signal/advantage_std": 0.13065763264894487, "signal/brier_reward/centered_abs_mean": 0.16479850709438323, "signal/brier_reward/group_std_mean": 0.20878478586673738, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020599813386797904, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020599813386797904, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05358843132853508, "signal/confidence_uniqueness_reward/group_std_mean": 0.06951197981834412, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006698553916066885, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006698553916066885, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0016239010030403734, "signal/frontier_aurc_reward/group_std_mean": 0.0026413511484861376, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.9067828654660845e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.9067828654660845e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_1/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_10/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_10/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_15/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_15/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_20/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_20/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_25/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_25/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_5/centered_abs_mean": 0.2256328582763672, "signal/frontier_coverage_5/group_std_mean": 0.2876005291938782, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004038827959448099, "signal/frontier_ece_reward/centered_abs_mean": 0.03931342288851738, "signal/frontier_ece_reward/group_std_mean": 0.05260321199893951, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004914177861064672, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004914177861064672, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6288164644200097, "eval_calibration/batch_distribution_entropy": 0.8989468045052211, "eval_calibration/buffer_distribution_entropy": 0.8895391515718143, "eval_calibration/confidence_entropy": 0.4647170323267861, "eval_calibration/coverage@0%": 0.0078125, "eval_calibration/coverage@1%": 0.0078125, "eval_calibration/coverage@10%": 0.0078125, "eval_calibration/coverage@15%": 0.0078125, "eval_calibration/coverage@20%": 0.0078125, "eval_calibration/coverage@25%": 0.0078125, "eval_calibration/coverage@30%": 0.0078125, "eval_calibration/coverage@5%": 0.0078125, "eval_calibration/ece": 0.25625, "eval_calibration/mean_confidence": 0.47421874999999997, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 320.25, "eval_completions/max_terminated_length": 320.25, "eval_completions/mean_length": 162.92638778686523, "eval_completions/mean_terminated_length": 162.92638778686523, "eval_completions/min_length": 84.0, "eval_completions/min_terminated_length": 84.0, "eval_loss": 0.0, "eval_num_tokens": 167120646.0, "eval_reward": 0.8989088386297226, "eval_reward_std": 0.22175507247447968, "eval_rewards/accuracy_reward": 0.36328125, "eval_rewards/brier_reward": 0.7475792616605759, "eval_rewards/confidence_uniqueness_reward": 0.8271484375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004181863856501877, "eval_rewards/frontier_coverage_1": 0.1855441853404045, "eval_rewards/frontier_coverage_10": 0.1855441853404045, "eval_rewards/frontier_coverage_15": 0.1855441853404045, "eval_rewards/frontier_coverage_20": 0.1855441853404045, "eval_rewards/frontier_coverage_25": 0.1855441853404045, "eval_rewards/frontier_coverage_5": 0.1855441853404045, "eval_rewards/frontier_ece_reward": 0.004597170656779781, "eval_runtime": 18.591, "eval_samples_per_second": 26.895, "eval_signal/accuracy_reward/centered_abs_mean": 0.448486328125, "eval_signal/accuracy_reward/group_std_mean": 0.48031486570835114, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2242431640625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2242431640625, "eval_signal/advantage_abs_mean": 0.19596171379089355, "eval_signal/advantage_pre_scale_abs_mean": 0.19596171379089355, "eval_signal/advantage_pre_scale_std": 0.21975785121321678, "eval_signal/advantage_std": 0.21975785121321678, "eval_signal/brier_reward/centered_abs_mean": 0.22779354453086853, "eval_signal/brier_reward/group_std_mean": 0.2757691219449043, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028474193066358566, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.028474193066358566, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0811309814453125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.10011672414839268, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010141372680664062, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010141372680664062, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0033902853610925376, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0057787023251876235, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.068610764486948e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.068610764486948e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_1/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_10/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_15/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_20/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_25/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3594288006424904, "eval_signal/frontier_coverage_5/group_std_mean": 0.4431358575820923, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00643377541564405, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.04951605014503002, "eval_signal/frontier_ece_reward/group_std_mean": 0.07548648118972778, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006189506268128753, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006189506268128753, "eval_steps_per_second": 0.215, "step": 50 }, { "epoch": 0.16, "step": 50, "train_probe_calibration/aurc": 0.3141581882630031, "train_probe_calibration/batch_distribution_entropy": 0.936476757190635, "train_probe_calibration/buffer_distribution_entropy": 0.8907348399167692, "train_probe_calibration/confidence_entropy": 0.49250123077527586, "train_probe_calibration/coverage@0%": 0.0859375, "train_probe_calibration/coverage@1%": 0.0859375, "train_probe_calibration/coverage@10%": 0.140625, "train_probe_calibration/coverage@15%": 0.1640625, "train_probe_calibration/coverage@20%": 0.2421875, "train_probe_calibration/coverage@25%": 0.3671875, "train_probe_calibration/coverage@30%": 0.4921875, "train_probe_calibration/coverage@5%": 0.0859375, "train_probe_calibration/ece": 0.1928515625, "train_probe_calibration/mean_confidence": 0.4678515625, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 302.0, "train_probe_completions/max_terminated_length": 302.0, "train_probe_completions/mean_length": 165.1945037841797, "train_probe_completions/mean_terminated_length": 165.1945037841797, "train_probe_completions/min_length": 85.75, "train_probe_completions/min_terminated_length": 85.75, "train_probe_loss": 0.0, "train_probe_num_tokens": 167120646.0, "train_probe_reward": 0.981845498085022, "train_probe_reward_std": 0.225032739341259, "train_probe_rewards/accuracy_reward": 0.533203125, "train_probe_rewards/brier_reward": 0.7775600701570511, "train_probe_rewards/confidence_uniqueness_reward": 0.83984375, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0024623668286949396, "train_probe_rewards/frontier_coverage_1": 0.0937139643356204, "train_probe_rewards/frontier_coverage_10": 0.0937139643356204, "train_probe_rewards/frontier_coverage_15": 0.0937139643356204, "train_probe_rewards/frontier_coverage_20": 0.0937139643356204, "train_probe_rewards/frontier_coverage_25": 0.0937139643356204, "train_probe_rewards/frontier_coverage_5": 0.0937139643356204, "train_probe_rewards/frontier_ece_reward": 0.02438117517158389, "train_probe_runtime": 16.9521, "train_probe_samples_per_second": 29.495, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4891357421875, "train_probe_signal/accuracy_reward/group_std_mean": 0.5024252682924271, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24456787109375, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.24456787109375, "train_probe_signal/advantage_abs_mean": 0.20956310257315636, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20956310257315636, "train_probe_signal/advantage_pre_scale_std": 0.22249595075845718, "train_probe_signal/advantage_std": 0.22249595075845718, "train_probe_signal/brier_reward/centered_abs_mean": 0.1993359997868538, "train_probe_signal/brier_reward/group_std_mean": 0.24190250411629677, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024916999973356724, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024916999973356724, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.072021484375, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.08701512962579727, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009002685546875, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009002685546875, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002210920094512403, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0033829217427410185, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.957547050958965e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.957547050958965e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3611508533358574, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.45599839091300964, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0064645998645573854, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.05064787529408932, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.07190386392176151, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006330984411761165, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006330984411761165, "train_probe_steps_per_second": 0.236 }, { "calibration/aurc": 0.34964791555898866, "calibration/batch_distribution_entropy": 0.9696213125270348, "calibration/buffer_distribution_entropy": 0.8983124294925157, "calibration/confidence_entropy": 0.4958941192671529, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.00234375, "calibration/coverage@15%": 0.00509424115913556, "calibration/coverage@20%": 0.05213040275049117, "calibration/coverage@25%": 0.08225227161100197, "calibration/coverage@30%": 0.37194278518708257, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.17160420712101454, "calibration/mean_confidence": 0.5104296144058368, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 876.0, "completions/max_terminated_length": 429.8, "completions/mean_length": 167.087109375, "completions/mean_terminated_length": 166.55306396484374, "completions/min_length": 75.0, "completions/min_terminated_length": 75.0, "epoch": 0.176, "grad_norm": 0.0011842965614050627, "learning_rate": 1e-06, "loss": 0.0021, "num_tokens": 184068738.0, "reward": 0.9703314185142518, "reward_std": 0.11359598636627197, "rewards/accuracy_reward": 0.50283203125, "rewards/brier_reward": 0.7624746680259704, "rewards/confidence_uniqueness_reward": 0.8847023010253906, "rewards/format_reward": 0.99912109375, "rewards/frontier_aurc_reward": -0.0029593195766210558, "rewards/frontier_coverage_1": 0.1041076198220253, "rewards/frontier_coverage_10": 0.1041076198220253, "rewards/frontier_coverage_15": 0.1041076198220253, "rewards/frontier_coverage_20": 0.1041076198220253, "rewards/frontier_coverage_25": 0.1041076198220253, "rewards/frontier_coverage_5": 0.1041076198220253, "rewards/frontier_ece_reward": 0.018636261485517024, "signal/accuracy_reward/centered_abs_mean": 0.140679931640625, "signal/accuracy_reward/group_std_mean": 0.18439924120903015, "signal/accuracy_reward/group_zero_std_frac": 0.478125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0703399658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0703399658203125, "signal/advantage_abs_mean": 0.08731478452682495, "signal/advantage_pre_scale_abs_mean": 0.08731478452682495, "signal/advantage_pre_scale_std": 0.13049161434173584, "signal/advantage_std": 0.13049161434173584, "signal/brier_reward/centered_abs_mean": 0.17366381585597992, "signal/brier_reward/group_std_mean": 0.21920994222164153, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02170797698199749, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02170797698199749, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05222712978720665, "signal/confidence_uniqueness_reward/group_std_mean": 0.06577225774526596, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006528391223400831, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006528391223400831, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002111483830958605, "signal/frontier_aurc_reward/group_std_mean": 0.003272761357948184, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.779556063818745e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.779556063818745e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_1/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_10/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_10/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_15/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_15/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_20/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_20/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_25/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_25/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_5/centered_abs_mean": 0.2239651769399643, "signal/frontier_coverage_5/group_std_mean": 0.28462483286857604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004008976416662336, "signal/frontier_ece_reward/centered_abs_mean": 0.04017831683158875, "signal/frontier_ece_reward/group_std_mean": 0.052979382872581485, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0050222896039485935, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0050222896039485935, "step": 55 }, { "calibration/aurc": 0.33092391595466186, "calibration/batch_distribution_entropy": 0.9636589659990976, "calibration/buffer_distribution_entropy": 0.9104053730857296, "calibration/confidence_entropy": 0.4565744740313494, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.033203125, "calibration/coverage@15%": 0.07109375, "calibration/coverage@20%": 0.123828125, "calibration/coverage@25%": 0.259375, "calibration/coverage@30%": 0.41015625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.10457382812500002, "calibration/mean_confidence": 0.5450074218750001, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1111.6, "completions/max_terminated_length": 581.6, "completions/mean_length": 171.0466796875, "completions/mean_terminated_length": 170.64740295410155, "completions/min_length": 73.4, "completions/min_terminated_length": 73.4, "epoch": 0.192, "grad_norm": 0.0011184883769601583, "learning_rate": 1e-06, "loss": 0.0012, "num_tokens": 200635072.0, "reward": 0.9835161685943603, "reward_std": 0.11471217423677445, "rewards/accuracy_reward": 0.5234375, "rewards/brier_reward": 0.7757836103439331, "rewards/confidence_uniqueness_reward": 0.8818888425827026, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0028618790674954653, "rewards/frontier_coverage_1": 0.10896083116531372, "rewards/frontier_coverage_10": 0.10896083116531372, "rewards/frontier_coverage_15": 0.10896083116531372, "rewards/frontier_coverage_20": 0.10896083116531372, "rewards/frontier_coverage_25": 0.10896083116531372, "rewards/frontier_coverage_5": 0.10896083116531372, "rewards/frontier_ece_reward": 0.02505997121334076, "signal/accuracy_reward/centered_abs_mean": 0.144921875, "signal/accuracy_reward/group_std_mean": 0.18714555501937866, "signal/accuracy_reward/group_zero_std_frac": 0.478125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0724609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0724609375, "signal/advantage_abs_mean": 0.08908778131008148, "signal/advantage_pre_scale_abs_mean": 0.08908778131008148, "signal/advantage_pre_scale_std": 0.1341209128499031, "signal/advantage_std": 0.1341209128499031, "signal/brier_reward/centered_abs_mean": 0.17730557322502136, "signal/brier_reward/group_std_mean": 0.22363218367099763, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02216319665312767, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02216319665312767, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.059557638317346576, "signal/confidence_uniqueness_reward/group_std_mean": 0.07315693497657776, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007444704789668322, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007444704789668322, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025338933803141115, "signal/frontier_aurc_reward/group_std_mean": 0.003893780894577503, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.535668922471814e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.535668922471814e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_1/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_10/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_10/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_15/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_15/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_20/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_20/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_25/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_25/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_5/centered_abs_mean": 0.21418379843235016, "signal/frontier_coverage_5/group_std_mean": 0.27754629850387574, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0038338899612426758, "signal/frontier_ece_reward/centered_abs_mean": 0.0428839735686779, "signal/frontier_ece_reward/group_std_mean": 0.055157840996980664, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005360496696084738, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005360496696084738, "step": 60 }, { "calibration/aurc": 0.25000286826356066, "calibration/batch_distribution_entropy": 0.9349390762939768, "calibration/buffer_distribution_entropy": 0.9173764134934357, "calibration/confidence_entropy": 0.4289778485805737, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.172265625, "calibration/coverage@15%": 0.25703125, "calibration/coverage@20%": 0.334765625, "calibration/coverage@25%": 0.585546875, "calibration/coverage@30%": 0.728125, "calibration/coverage@5%": 0.0078125, "calibration/ece": 0.12878515624999998, "calibration/mean_confidence": 0.5667140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1127.2, "completions/max_terminated_length": 594.2, "completions/mean_length": 174.773046875, "completions/mean_terminated_length": 173.9766632080078, "completions/min_length": 77.6, "completions/min_terminated_length": 77.6, "epoch": 0.208, "grad_norm": 0.001059314119629562, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 217456972.0, "reward": 1.0045670747756958, "reward_std": 0.11658241301774978, "rewards/accuracy_reward": 0.567578125, "rewards/brier_reward": 0.7832911968231201, "rewards/confidence_uniqueness_reward": 0.878244411945343, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002497353684157133, "rewards/frontier_coverage_1": 0.0910948745906353, "rewards/frontier_coverage_10": 0.0910948745906353, "rewards/frontier_coverage_15": 0.0910948745906353, "rewards/frontier_coverage_20": 0.0910948745906353, "rewards/frontier_coverage_25": 0.0910948745906353, "rewards/frontier_coverage_5": 0.0910948745906353, "rewards/frontier_ece_reward": 0.029902569949626923, "signal/accuracy_reward/centered_abs_mean": 0.14205322265625, "signal/accuracy_reward/group_std_mean": 0.18839455246925355, "signal/accuracy_reward/group_zero_std_frac": 0.459375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.071026611328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.071026611328125, "signal/advantage_abs_mean": 0.08862931281328201, "signal/advantage_pre_scale_abs_mean": 0.08862931281328201, "signal/advantage_pre_scale_std": 0.13590119183063507, "signal/advantage_std": 0.13590119183063507, "signal/brier_reward/centered_abs_mean": 0.18094989657402039, "signal/brier_reward/group_std_mean": 0.227994641661644, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022618737071752548, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022618737071752548, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06556902974843978, "signal/confidence_uniqueness_reward/group_std_mean": 0.07995099425315857, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008196128718554973, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008196128718554973, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417306780815, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002661912888288498, "signal/frontier_aurc_reward/group_std_mean": 0.004251162149012088, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.76482389785815e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.76482389785815e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_1/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_10/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_10/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_15/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_15/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_20/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_20/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_25/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_25/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_5/centered_abs_mean": 0.21462590992450714, "signal/frontier_coverage_5/group_std_mean": 0.2767761766910553, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003841803641989827, "signal/frontier_ece_reward/centered_abs_mean": 0.04156382754445076, "signal/frontier_ece_reward/group_std_mean": 0.05281273275613785, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005195478443056345, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005195478443056345, "step": 65 }, { "calibration/aurc": 0.30945615354255396, "calibration/batch_distribution_entropy": 0.9465401465342558, "calibration/buffer_distribution_entropy": 0.9241643371521917, "calibration/confidence_entropy": 0.40588373989046644, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.12734375, "calibration/coverage@15%": 0.280078125, "calibration/coverage@20%": 0.401953125, "calibration/coverage@25%": 0.47265625, "calibration/coverage@30%": 0.56015625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16765182064885026, "calibration/mean_confidence": 0.5055874925850048, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1536.0, "completions/max_terminated_length": 802.2, "completions/mean_length": 175.83212890625, "completions/mean_terminated_length": 175.16766052246095, "completions/min_length": 74.2, "completions/min_terminated_length": 74.2, "epoch": 0.224, "grad_norm": 0.0009633832960389555, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 234410677.0, "reward": 0.990851080417633, "reward_std": 0.11021712720394135, "rewards/accuracy_reward": 0.5328125, "rewards/brier_reward": 0.783275818824768, "rewards/confidence_uniqueness_reward": 0.8812341094017029, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.0026209069881588222, "rewards/frontier_coverage_1": 0.12510305941104888, "rewards/frontier_coverage_10": 0.12510305941104888, "rewards/frontier_coverage_15": 0.12510305941104888, "rewards/frontier_coverage_20": 0.12510305941104888, "rewards/frontier_coverage_25": 0.12510305941104888, "rewards/frontier_coverage_5": 0.12510305941104888, "rewards/frontier_ece_reward": 0.026669794321060182, "signal/accuracy_reward/centered_abs_mean": 0.12713623046875, "signal/accuracy_reward/group_std_mean": 0.1714620292186737, "signal/accuracy_reward/group_zero_std_frac": 0.490625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.063568115234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.063568115234375, "signal/advantage_abs_mean": 0.08353961855173112, "signal/advantage_pre_scale_abs_mean": 0.08353961855173112, "signal/advantage_pre_scale_std": 0.13004283159971236, "signal/advantage_std": 0.13004283159971236, "signal/brier_reward/centered_abs_mean": 0.17861478626728058, "signal/brier_reward/group_std_mean": 0.22785739600658417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022326848283410073, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022326848283410073, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06680140942335129, "signal/confidence_uniqueness_reward/group_std_mean": 0.08212911635637284, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008350176177918911, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008350176177918911, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028360622934997083, "signal/frontier_aurc_reward/group_std_mean": 0.004303571488708258, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.076551242382266e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.076551242382266e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_1/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_10/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_10/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_15/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_15/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_20/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_20/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_25/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_25/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_5/centered_abs_mean": 0.21149853765964508, "signal/frontier_coverage_5/group_std_mean": 0.27557849884033203, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037858237978070975, "signal/frontier_ece_reward/centered_abs_mean": 0.037810226529836656, "signal/frontier_ece_reward/group_std_mean": 0.04775542095303535, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004726278316229582, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004726278316229582, "step": 70 }, { "calibration/aurc": 0.261170135309063, "calibration/batch_distribution_entropy": 0.898169931982497, "calibration/buffer_distribution_entropy": 0.9293096435116759, "calibration/confidence_entropy": 0.3891703973877817, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.1546875, "calibration/coverage@15%": 0.29375, "calibration/coverage@20%": 0.36953125, "calibration/coverage@25%": 0.490234375, "calibration/coverage@30%": 0.64921875, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14350970052213544, "calibration/mean_confidence": 0.5793494293798254, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1348.4, "completions/max_terminated_length": 551.0, "completions/mean_length": 177.95, "completions/mean_terminated_length": 177.28620300292968, "completions/min_length": 77.8, "completions/min_terminated_length": 77.8, "epoch": 0.24, "grad_norm": 0.0010709511116147041, "learning_rate": 1e-06, "loss": 0.0017, "num_tokens": 251484565.0, "reward": 1.0148473739624024, "reward_std": 0.11671035438776016, "rewards/accuracy_reward": 0.59541015625, "rewards/brier_reward": 0.7709134936332702, "rewards/confidence_uniqueness_reward": 0.8791958093643188, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.0024798931321129204, "rewards/frontier_coverage_1": 0.0713951449841261, "rewards/frontier_coverage_10": 0.0713951449841261, "rewards/frontier_coverage_15": 0.0713951449841261, "rewards/frontier_coverage_20": 0.0713951449841261, "rewards/frontier_coverage_25": 0.0713951449841261, "rewards/frontier_coverage_5": 0.0713951449841261, "rewards/frontier_ece_reward": 0.028385131061077117, "signal/accuracy_reward/centered_abs_mean": 0.145281982421875, "signal/accuracy_reward/group_std_mean": 0.19074405431747438, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0726409912109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0726409912109375, "signal/advantage_abs_mean": 0.09049365520477295, "signal/advantage_pre_scale_abs_mean": 0.09049365520477295, "signal/advantage_pre_scale_std": 0.13883660733699799, "signal/advantage_std": 0.13883660733699799, "signal/brier_reward/centered_abs_mean": 0.19185077846050264, "signal/brier_reward/group_std_mean": 0.24133287370204926, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02398134730756283, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02398134730756283, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07062419950962066, "signal/confidence_uniqueness_reward/group_std_mean": 0.08817773014307022, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008828024938702583, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008828024938702583, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032755933701992036, "signal/frontier_aurc_reward/group_std_mean": 0.005048427078872919, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.863312107976526e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.863312107976526e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_1/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_10/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_10/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_15/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_15/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_20/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_20/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_25/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_25/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_5/centered_abs_mean": 0.21241567730903627, "signal/frontier_coverage_5/group_std_mean": 0.2803816318511963, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003802240453660488, "signal/frontier_ece_reward/centered_abs_mean": 0.03876788690686226, "signal/frontier_ece_reward/group_std_mean": 0.04816498681902885, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048459858633577825, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048459858633577825, "step": 75 }, { "calibration/aurc": 0.2467021288852847, "calibration/batch_distribution_entropy": 0.8769867203545791, "calibration/buffer_distribution_entropy": 0.9305634576699605, "calibration/confidence_entropy": 0.35485692049776835, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.141796875, "calibration/coverage@15%": 0.353515625, "calibration/coverage@20%": 0.529296875, "calibration/coverage@25%": 0.612109375, "calibration/coverage@30%": 0.683984375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1159896194053629, "calibration/mean_confidence": 0.5697380717473134, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 1156.4, "completions/max_terminated_length": 519.2, "completions/mean_length": 172.75732421875, "completions/mean_terminated_length": 171.95858154296874, "completions/min_length": 80.0, "completions/min_terminated_length": 80.0, "epoch": 0.256, "grad_norm": 0.0012047714553773403, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 268308416.0, "reward": 1.0095821619033813, "reward_std": 0.10738990157842636, "rewards/accuracy_reward": 0.56796875, "rewards/brier_reward": 0.7930272340774536, "rewards/confidence_uniqueness_reward": 0.8817017793655395, "rewards/format_reward": 0.99873046875, "rewards/frontier_aurc_reward": -0.0023847362026572227, "rewards/frontier_coverage_1": 0.12308522313833237, "rewards/frontier_coverage_10": 0.12308522313833237, "rewards/frontier_coverage_15": 0.12308522313833237, "rewards/frontier_coverage_20": 0.12308522313833237, "rewards/frontier_coverage_25": 0.12308522313833237, "rewards/frontier_coverage_5": 0.12308522313833237, "rewards/frontier_ece_reward": 0.029718055576086044, "signal/accuracy_reward/centered_abs_mean": 0.1286376953125, "signal/accuracy_reward/group_std_mean": 0.16931941509246826, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06431884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06431884765625, "signal/advantage_abs_mean": 0.08091795295476914, "signal/advantage_pre_scale_abs_mean": 0.08091795295476914, "signal/advantage_pre_scale_std": 0.13172105848789215, "signal/advantage_std": 0.13172105848789215, "signal/brier_reward/centered_abs_mean": 0.17564865946769714, "signal/brier_reward/group_std_mean": 0.22503041923046113, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021956082433462143, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021956082433462143, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06999158263206481, "signal/confidence_uniqueness_reward/group_std_mean": 0.08747024983167648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008748947829008102, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008748947829008102, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.00718155293725431, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032616748940199614, "signal/frontier_aurc_reward/group_std_mean": 0.0050237664021551606, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.8383979194331914e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.8383979194331914e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_1/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_10/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_10/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_15/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_15/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_20/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_20/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_25/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_25/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_5/centered_abs_mean": 0.2024263024330139, "signal/frontier_coverage_5/group_std_mean": 0.26835508942604064, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003623430663719773, "signal/frontier_ece_reward/centered_abs_mean": 0.032590297609567644, "signal/frontier_ece_reward/group_std_mean": 0.0412607304751873, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0040737872011959554, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0040737872011959554, "step": 80 }, { "calibration/aurc": 0.2642183850892495, "calibration/batch_distribution_entropy": 0.9123858748662885, "calibration/buffer_distribution_entropy": 0.9324726343473598, "calibration/confidence_entropy": 0.3785149112394486, "calibration/coverage@0%": 0.012890625, "calibration/coverage@1%": 0.012890625, "calibration/coverage@10%": 0.116015625, "calibration/coverage@15%": 0.3109375, "calibration/coverage@20%": 0.408984375, "calibration/coverage@25%": 0.5415048617906066, "calibration/coverage@30%": 0.6079348091976516, "calibration/coverage@5%": 0.012890625, "calibration/ece": 0.132531155446651, "calibration/mean_confidence": 0.5557964833742884, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1267.4, "completions/max_terminated_length": 663.4, "completions/mean_length": 179.5677734375, "completions/mean_terminated_length": 179.03648986816407, "completions/min_length": 84.6, "completions/min_terminated_length": 84.6, "epoch": 0.272, "grad_norm": 0.0008821667870506644, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 285112886.0, "reward": 1.0122202634811401, "reward_std": 0.103294475376606, "rewards/accuracy_reward": 0.5728515625, "rewards/brier_reward": 0.7857265949249268, "rewards/confidence_uniqueness_reward": 0.9025682806968689, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.002512221224606037, "rewards/frontier_coverage_1": 0.10919135846197606, "rewards/frontier_coverage_10": 0.10919135846197606, "rewards/frontier_coverage_15": 0.10919135846197606, "rewards/frontier_coverage_20": 0.10919135846197606, "rewards/frontier_coverage_25": 0.10919135846197606, "rewards/frontier_coverage_5": 0.10919135846197606, "rewards/frontier_ece_reward": 0.02616579309105873, "signal/accuracy_reward/centered_abs_mean": 0.12288818359375, "signal/accuracy_reward/group_std_mean": 0.15963667035102844, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061444091796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.061444091796875, "signal/advantage_abs_mean": 0.08039057850837708, "signal/advantage_pre_scale_abs_mean": 0.08039057850837708, "signal/advantage_pre_scale_std": 0.12777341157197952, "signal/advantage_std": 0.12777341157197952, "signal/brier_reward/centered_abs_mean": 0.17774806618690492, "signal/brier_reward/group_std_mean": 0.22448875308036803, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022218508273363115, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022218508273363115, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.055553752928972244, "signal/confidence_uniqueness_reward/group_std_mean": 0.06775816455483437, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0069442191161215305, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0069442191161215305, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0031457245349884033, "signal/frontier_aurc_reward/group_std_mean": 0.004772697854787111, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.630846717394888e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.630846717394888e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_1/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_10/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_10/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_15/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_15/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_20/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_20/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_25/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_25/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_5/centered_abs_mean": 0.20491620600223542, "signal/frontier_coverage_5/group_std_mean": 0.2665486991405487, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003667999850586057, "signal/frontier_ece_reward/centered_abs_mean": 0.030647655576467515, "signal/frontier_ece_reward/group_std_mean": 0.03821772038936615, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0038309569470584394, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0038309569470584394, "step": 85 }, { "calibration/aurc": 0.32164312709967285, "calibration/batch_distribution_entropy": 0.889030555552942, "calibration/buffer_distribution_entropy": 0.9340647660701571, "calibration/confidence_entropy": 0.3723696647266723, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.064453125, "calibration/coverage@15%": 0.10546875, "calibration/coverage@20%": 0.18671875, "calibration/coverage@25%": 0.279296875, "calibration/coverage@30%": 0.40625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.14947785675533237, "calibration/mean_confidence": 0.5987036054648168, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 419.2, "completions/max_terminated_length": 419.2, "completions/mean_length": 173.05849609375, "completions/mean_terminated_length": 173.05849609375, "completions/min_length": 81.6, "completions/min_terminated_length": 81.6, "epoch": 0.288, "grad_norm": 0.0011444967240095139, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 301843181.0, "reward": 1.0008944511413573, "reward_std": 0.10447315275669097, "rewards/accuracy_reward": 0.551953125, "rewards/brier_reward": 0.7695533514022828, "rewards/confidence_uniqueness_reward": 0.9145703554153443, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0030466041062027216, "rewards/frontier_coverage_1": 0.10940263271331788, "rewards/frontier_coverage_10": 0.10940263271331788, "rewards/frontier_coverage_15": 0.10940263271331788, "rewards/frontier_coverage_20": 0.10940263271331788, "rewards/frontier_coverage_25": 0.10940263271331788, "rewards/frontier_coverage_5": 0.10940263271331788, "rewards/frontier_ece_reward": 0.022437838092446327, "signal/accuracy_reward/centered_abs_mean": 0.12567138671875, "signal/accuracy_reward/group_std_mean": 0.16594321131706238, "signal/accuracy_reward/group_zero_std_frac": 0.525, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.062835693359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.062835693359375, "signal/advantage_abs_mean": 0.07972771823406219, "signal/advantage_pre_scale_abs_mean": 0.07972771823406219, "signal/advantage_pre_scale_std": 0.12657924741506577, "signal/advantage_std": 0.12657924741506577, "signal/brier_reward/centered_abs_mean": 0.1829427719116211, "signal/brier_reward/group_std_mean": 0.23203744888305664, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022867846488952636, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.022867846488952636, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.050545477867126466, "signal/confidence_uniqueness_reward/group_std_mean": 0.06067455634474754, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006318184733390808, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006318184733390808, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0034512685146182776, "signal/frontier_aurc_reward/group_std_mean": 0.00543216560035944, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.177770264912397e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.177770264912397e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_1/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_10/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_10/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_15/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_15/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_20/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_20/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_25/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_25/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_5/centered_abs_mean": 0.20699847042560576, "signal/frontier_coverage_5/group_std_mean": 0.2705923795700073, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037052724044770004, "signal/frontier_ece_reward/centered_abs_mean": 0.029942670091986655, "signal/frontier_ece_reward/group_std_mean": 0.037430693954229356, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003742833761498332, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003742833761498332, "step": 90 }, { "calibration/aurc": 0.21649904704687412, "calibration/batch_distribution_entropy": 0.8802667061478353, "calibration/buffer_distribution_entropy": 0.9338496829298666, "calibration/confidence_entropy": 0.3712671385467933, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.026953125, "calibration/coverage@15%": 0.2890625, "calibration/coverage@20%": 0.645703125, "calibration/coverage@25%": 0.769140625, "calibration/coverage@30%": 0.862109375, "calibration/coverage@5%": 0.0, "calibration/ece": 0.11950063296604121, "calibration/mean_confidence": 0.6085048357839588, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 773.0, "completions/max_terminated_length": 563.2, "completions/mean_length": 173.11982421875, "completions/mean_terminated_length": 172.98677978515624, "completions/min_length": 78.2, "completions/min_terminated_length": 78.2, "epoch": 0.304, "grad_norm": 0.0012024985626339912, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 318545880.0, "reward": 1.0252812027931213, "reward_std": 0.10164082497358322, "rewards/accuracy_reward": 0.5994140625, "rewards/brier_reward": 0.7789253473281861, "rewards/confidence_uniqueness_reward": 0.9277184486389161, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0023648647125810384, "rewards/frontier_coverage_1": 0.0874197174794972, "rewards/frontier_coverage_10": 0.0874197174794972, "rewards/frontier_coverage_15": 0.0874197174794972, "rewards/frontier_coverage_20": 0.0874197174794972, "rewards/frontier_coverage_25": 0.0874197174794972, "rewards/frontier_coverage_5": 0.0874197174794972, "rewards/frontier_ece_reward": 0.02356785014271736, "signal/accuracy_reward/centered_abs_mean": 0.12322998046875, "signal/accuracy_reward/group_std_mean": 0.1658935070037842, "signal/accuracy_reward/group_zero_std_frac": 0.515625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.061614990234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.061614990234375, "signal/advantage_abs_mean": 0.07720276862382888, "signal/advantage_pre_scale_abs_mean": 0.07720276862382888, "signal/advantage_pre_scale_std": 0.12322989106178284, "signal/advantage_std": 0.12322989106178284, "signal/brier_reward/centered_abs_mean": 0.17541297674179077, "signal/brier_reward/group_std_mean": 0.22342281341552733, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021926622092723846, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021926622092723846, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434452086687088, "signal/confidence_uniqueness_reward/group_std_mean": 0.05217555984854698, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0054306510835886, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0054306510835886, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002743481518700719, "signal/frontier_aurc_reward/group_std_mean": 0.004372889362275601, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.910831557936035e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.910831557936035e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_1/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_10/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_10/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_15/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_15/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_20/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_20/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_25/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_25/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_5/centered_abs_mean": 0.2050688624382019, "signal/frontier_coverage_5/group_std_mean": 0.27132690250873565, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036707324907183647, "signal/frontier_ece_reward/centered_abs_mean": 0.028253377601504325, "signal/frontier_ece_reward/group_std_mean": 0.0347771979868412, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0035316722001880406, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0035316722001880406, "step": 95 }, { "calibration/aurc": 0.2098873289679891, "calibration/batch_distribution_entropy": 0.8840538637302338, "calibration/buffer_distribution_entropy": 0.9334977967028385, "calibration/confidence_entropy": 0.3802012123632416, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.153515625, "calibration/coverage@15%": 0.375, "calibration/coverage@20%": 0.535546875, "calibration/coverage@25%": 0.64609375, "calibration/coverage@30%": 0.844921875, "calibration/coverage@5%": 0.00859375, "calibration/ece": 0.1021149372829088, "calibration/mean_confidence": 0.6126254173994252, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 759.6, "completions/max_terminated_length": 534.4, "completions/mean_length": 171.9232421875, "completions/mean_terminated_length": 171.7903839111328, "completions/min_length": 77.6, "completions/min_terminated_length": 77.6, "epoch": 0.32, "grad_norm": 0.000909713504370302, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 335395078.0, "reward": 1.0206497073173524, "reward_std": 0.0881238341331482, "rewards/accuracy_reward": 0.577734375, "rewards/brier_reward": 0.7934822678565979, "rewards/confidence_uniqueness_reward": 0.9379700899124146, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.00240150757599622, "rewards/frontier_coverage_1": 0.11633307486772537, "rewards/frontier_coverage_10": 0.11633307486772537, "rewards/frontier_coverage_15": 0.11633307486772537, "rewards/frontier_coverage_20": 0.11633307486772537, "rewards/frontier_coverage_25": 0.11633307486772537, "rewards/frontier_coverage_5": 0.11633307486772537, "rewards/frontier_ece_reward": 0.023979850485920905, "signal/accuracy_reward/centered_abs_mean": 0.09036865234375, "signal/accuracy_reward/group_std_mean": 0.12911975234746934, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045184326171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045184326171875, "signal/advantage_abs_mean": 0.06487039029598236, "signal/advantage_pre_scale_abs_mean": 0.06487039029598236, "signal/advantage_pre_scale_std": 0.11023673117160797, "signal/advantage_std": 0.11023673117160797, "signal/brier_reward/centered_abs_mean": 0.16053855717182158, "signal/brier_reward/group_std_mean": 0.20720755457878112, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020067319646477698, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020067319646477698, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.037102106213569644, "signal/confidence_uniqueness_reward/group_std_mean": 0.04544886723160744, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0046377632766962055, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0046377632766962055, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025523790158331395, "signal/frontier_aurc_reward/group_std_mean": 0.004083980154246092, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.568758231471293e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.568758231471293e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_1/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_10/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_10/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_15/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_15/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_20/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_20/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_25/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_25/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_5/centered_abs_mean": 0.18334722518920898, "signal/frontier_coverage_5/group_std_mean": 0.2422294318675995, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003281915234401822, "signal/frontier_ece_reward/centered_abs_mean": 0.025719008594751357, "signal/frontier_ece_reward/group_std_mean": 0.03179643303155899, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032148760743439197, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032148760743439197, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5391036516427652, "eval_calibration/batch_distribution_entropy": 0.896161072633235, "eval_calibration/buffer_distribution_entropy": 0.9332897449793491, "eval_calibration/confidence_entropy": 0.3928585129324643, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.0234375, "eval_calibration/coverage@20%": 0.0234375, "eval_calibration/coverage@25%": 0.0546875, "eval_calibration/coverage@30%": 0.1328125, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.31548437500000004, "eval_calibration/mean_confidence": 0.5214375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 318.0, "eval_completions/max_terminated_length": 318.0, "eval_completions/mean_length": 174.37715530395508, "eval_completions/mean_terminated_length": 174.37715530395508, "eval_completions/min_length": 94.0, "eval_completions/min_terminated_length": 94.0, "eval_loss": 0.0, "eval_num_tokens": 335395078.0, "eval_reward": 0.9348780065774918, "eval_reward_std": 0.2365853264927864, "eval_rewards/accuracy_reward": 0.400390625, "eval_rewards/brier_reward": 0.7740589827299118, "eval_rewards/confidence_uniqueness_reward": 0.888916015625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0038473134045489132, "eval_rewards/frontier_coverage_1": 0.23241731896996498, "eval_rewards/frontier_coverage_10": 0.23241731896996498, "eval_rewards/frontier_coverage_15": 0.23241731896996498, "eval_rewards/frontier_coverage_20": 0.23241731896996498, "eval_rewards/frontier_coverage_25": 0.23241731896996498, "eval_rewards/frontier_coverage_5": 0.23241731896996498, "eval_rewards/frontier_ece_reward": 0.015344643266871572, "eval_runtime": 18.5787, "eval_samples_per_second": 26.913, "eval_signal/accuracy_reward/centered_abs_mean": 0.4688720703125, "eval_signal/accuracy_reward/group_std_mean": 0.49159620702266693, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23443603515625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23443603515625, "eval_signal/advantage_abs_mean": 0.21387740224599838, "eval_signal/advantage_pre_scale_abs_mean": 0.21387740224599838, "eval_signal/advantage_pre_scale_std": 0.23405225947499275, "eval_signal/advantage_std": 0.23405225947499275, "eval_signal/brier_reward/centered_abs_mean": 0.25311582535505295, "eval_signal/brier_reward/group_std_mean": 0.31092750281095505, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03163947816938162, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.03163947816938162, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.051788330078125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06362179387360811, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006473541259765625, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006473541259765625, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004530601087026298, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0076346289133653045, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.109775626508053e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.109775626508053e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_1/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_10/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_15/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_20/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_25/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.37415503710508347, "eval_signal/frontier_coverage_5/group_std_mean": 0.45467519015073776, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006697374978102744, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.035073693841695786, "eval_signal/frontier_ece_reward/group_std_mean": 0.046107963658869267, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004384211730211973, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004384211730211973, "eval_steps_per_second": 0.215, "step": 100 }, { "epoch": 0.32, "step": 100, "train_probe_calibration/aurc": 0.2376980377133199, "train_probe_calibration/batch_distribution_entropy": 0.8460000670916705, "train_probe_calibration/buffer_distribution_entropy": 0.9334354664119227, "train_probe_calibration/confidence_entropy": 0.3949835291522682, "train_probe_calibration/coverage@0%": 0.140625, "train_probe_calibration/coverage@1%": 0.140625, "train_probe_calibration/coverage@10%": 0.1953125, "train_probe_calibration/coverage@15%": 0.28125, "train_probe_calibration/coverage@20%": 0.5078125, "train_probe_calibration/coverage@25%": 0.609375, "train_probe_calibration/coverage@30%": 0.703125, "train_probe_calibration/coverage@5%": 0.140625, "train_probe_calibration/ece": 0.19056250000000002, "train_probe_calibration/mean_confidence": 0.5500625, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 351.25, "train_probe_completions/max_terminated_length": 351.25, "train_probe_completions/mean_length": 172.39978408813477, "train_probe_completions/mean_terminated_length": 172.39978408813477, "train_probe_completions/min_length": 90.25, "train_probe_completions/min_terminated_length": 90.25, "train_probe_loss": 0.0, "train_probe_num_tokens": 335395078.0, "train_probe_reward": 1.0273381769657135, "train_probe_reward_std": 0.2266932986676693, "train_probe_rewards/accuracy_reward": 0.60546875, "train_probe_rewards/brier_reward": 0.7994071692228317, "train_probe_rewards/confidence_uniqueness_reward": 0.88720703125, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0018720118096098304, "train_probe_rewards/frontier_coverage_1": 0.09955346956849098, "train_probe_rewards/frontier_coverage_10": 0.09955346956849098, "train_probe_rewards/frontier_coverage_15": 0.09955346956849098, "train_probe_rewards/frontier_coverage_20": 0.09955346956849098, "train_probe_rewards/frontier_coverage_25": 0.09955346956849098, "train_probe_rewards/frontier_coverage_5": 0.09955346956849098, "train_probe_rewards/frontier_ece_reward": 0.024947880767285824, "train_probe_runtime": 19.5457, "train_probe_samples_per_second": 25.581, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.468505859375, "train_probe_signal/accuracy_reward/group_std_mean": 0.49154773354530334, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2342529296875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2342529296875, "train_probe_signal/advantage_abs_mean": 0.20661941543221474, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20661941543221474, "train_probe_signal/advantage_pre_scale_std": 0.2241741679608822, "train_probe_signal/advantage_std": 0.2241741679608822, "train_probe_signal/brier_reward/centered_abs_mean": 0.231151282787323, "train_probe_signal/brier_reward/group_std_mean": 0.2955388203263283, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.028893910348415375, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.028893910348415375, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0560455322265625, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0693097673356533, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0070056915283203125, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0070056915283203125, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0028542151558212936, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.005022436263971031, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.109044832352083e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.109044832352083e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3502242639660835, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4751490503549576, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006269014091230929, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.034564562141895294, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.043938882648944855, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004320570267736912, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004320570267736912, "train_probe_steps_per_second": 0.205 }, { "calibration/aurc": 0.26481606899320165, "calibration/batch_distribution_entropy": 0.911049416340618, "calibration/buffer_distribution_entropy": 0.9370308540267022, "calibration/confidence_entropy": 0.3856464288010409, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.094140625, "calibration/coverage@15%": 0.33984375, "calibration/coverage@20%": 0.397265625, "calibration/coverage@25%": 0.46796875, "calibration/coverage@30%": 0.61328125, "calibration/coverage@5%": 0.05859375, "calibration/ece": 0.1609574228715549, "calibration/mean_confidence": 0.5372468384372941, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 650.6, "completions/max_terminated_length": 446.6, "completions/mean_length": 171.29970703125, "completions/mean_terminated_length": 171.1662384033203, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.336, "grad_norm": 0.0009579297038726509, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 351871619.0, "reward": 1.026773464679718, "reward_std": 0.0938282698392868, "rewards/accuracy_reward": 0.58876953125, "rewards/brier_reward": 0.7977856278419495, "rewards/confidence_uniqueness_reward": 0.9435371160507202, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0020208923146128655, "rewards/frontier_coverage_1": 0.1127518393099308, "rewards/frontier_coverage_10": 0.1127518393099308, "rewards/frontier_coverage_15": 0.1127518393099308, "rewards/frontier_coverage_20": 0.1127518393099308, "rewards/frontier_coverage_25": 0.1127518393099308, "rewards/frontier_coverage_5": 0.1127518393099308, "rewards/frontier_ece_reward": 0.021590472385287284, "signal/accuracy_reward/centered_abs_mean": 0.114825439453125, "signal/accuracy_reward/group_std_mean": 0.1522398740053177, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574127197265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0574127197265625, "signal/advantage_abs_mean": 0.07198196649551392, "signal/advantage_pre_scale_abs_mean": 0.07198196649551392, "signal/advantage_pre_scale_std": 0.11692783534526825, "signal/advantage_std": 0.11692783534526825, "signal/brier_reward/centered_abs_mean": 0.16734021306037902, "signal/brier_reward/group_std_mean": 0.21267004311084747, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020917526632547378, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020917526632547378, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03207938522100449, "signal/confidence_uniqueness_reward/group_std_mean": 0.04001001343131065, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004009923152625561, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004009923152625561, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021244912641122937, "signal/frontier_aurc_reward/group_std_mean": 0.0033777955919504165, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.802839128184132e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.802839128184132e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_1/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_10/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_10/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_15/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_15/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_20/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_20/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_25/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_25/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_5/centered_abs_mean": 0.20386078357696533, "signal/frontier_coverage_5/group_std_mean": 0.2637325257062912, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036491078790277243, "signal/frontier_ece_reward/centered_abs_mean": 0.022904927283525466, "signal/frontier_ece_reward/group_std_mean": 0.028568074852228165, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002863115910440683, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002863115910440683, "step": 105 }, { "calibration/aurc": 0.2918670757064623, "calibration/batch_distribution_entropy": 0.8617419290524296, "calibration/buffer_distribution_entropy": 0.9459875684518371, "calibration/confidence_entropy": 0.3511704141965763, "calibration/coverage@0%": 0.0125, "calibration/coverage@1%": 0.0125, "calibration/coverage@10%": 0.226953125, "calibration/coverage@15%": 0.337890625, "calibration/coverage@20%": 0.399609375, "calibration/coverage@25%": 0.4484375, "calibration/coverage@30%": 0.512109375, "calibration/coverage@5%": 0.0890625, "calibration/ece": 0.12058771609022725, "calibration/mean_confidence": 0.45003809801340733, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 635.8, "completions/max_terminated_length": 446.2, "completions/mean_length": 170.15615234375, "completions/mean_terminated_length": 170.0229278564453, "completions/min_length": 75.2, "completions/min_terminated_length": 75.2, "epoch": 0.352, "grad_norm": 0.0009440815192647278, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 368874434.0, "reward": 0.9951952695846558, "reward_std": 0.08828350454568863, "rewards/accuracy_reward": 0.52373046875, "rewards/brier_reward": 0.784073281288147, "rewards/confidence_uniqueness_reward": 0.9385409832000733, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0024972121231257916, "rewards/frontier_coverage_1": 0.15070441961288453, "rewards/frontier_coverage_10": 0.15070441961288453, "rewards/frontier_coverage_15": 0.15070441961288453, "rewards/frontier_coverage_20": 0.15070441961288453, "rewards/frontier_coverage_25": 0.15070441961288453, "rewards/frontier_coverage_5": 0.15070441961288453, "rewards/frontier_ece_reward": 0.015288973599672318, "signal/accuracy_reward/centered_abs_mean": 0.105633544921875, "signal/accuracy_reward/group_std_mean": 0.14147602766752243, "signal/accuracy_reward/group_zero_std_frac": 0.584375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0528167724609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0528167724609375, "signal/advantage_abs_mean": 0.06714115589857102, "signal/advantage_pre_scale_abs_mean": 0.06714115589857102, "signal/advantage_pre_scale_std": 0.1110717460513115, "signal/advantage_std": 0.1110717460513115, "signal/brier_reward/centered_abs_mean": 0.1665810763835907, "signal/brier_reward/group_std_mean": 0.21240653693675995, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02082263454794884, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02082263454794884, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03552674874663353, "signal/confidence_uniqueness_reward/group_std_mean": 0.0450509749352932, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004440843593329191, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004440843593329191, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002249357360415161, "signal/frontier_aurc_reward/group_std_mean": 0.003604770079255104, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.026349415653385e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.026349415653385e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_1/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_10/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_10/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_15/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_15/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_20/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_20/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_25/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_25/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_5/centered_abs_mean": 0.2058982342481613, "signal/frontier_coverage_5/group_std_mean": 0.2676191568374634, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003685578191652894, "signal/frontier_ece_reward/centered_abs_mean": 0.018955815210938455, "signal/frontier_ece_reward/group_std_mean": 0.023499416932463647, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002369476901367307, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002369476901367307, "step": 110 }, { "calibration/aurc": 0.26006198370363565, "calibration/batch_distribution_entropy": 0.8909601189119198, "calibration/buffer_distribution_entropy": 0.9543928456982405, "calibration/confidence_entropy": 0.3737341235551185, "calibration/coverage@0%": 0.01603626467710372, "calibration/coverage@1%": 0.01603626467710372, "calibration/coverage@10%": 0.20185680650684928, "calibration/coverage@15%": 0.28745031188845405, "calibration/coverage@20%": 0.4328125, "calibration/coverage@25%": 0.584765625, "calibration/coverage@30%": 0.668359375, "calibration/coverage@5%": 0.05321826076320939, "calibration/ece": 0.12706476116156798, "calibration/mean_confidence": 0.5286066296783638, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 687.6, "completions/max_terminated_length": 478.2, "completions/mean_length": 167.1548828125, "completions/mean_terminated_length": 167.02129821777345, "completions/min_length": 76.8, "completions/min_terminated_length": 76.8, "epoch": 0.368, "grad_norm": 0.0009902457240968943, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 385651572.0, "reward": 1.0256622314453125, "reward_std": 0.08201654255390167, "rewards/accuracy_reward": 0.57900390625, "rewards/brier_reward": 0.8139191031455993, "rewards/confidence_uniqueness_reward": 0.9448665976524353, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0020156882936134936, "rewards/frontier_coverage_1": 0.13771790713071824, "rewards/frontier_coverage_10": 0.13771790713071824, "rewards/frontier_coverage_15": 0.13771790713071824, "rewards/frontier_coverage_20": 0.13771790713071824, "rewards/frontier_coverage_25": 0.12578624486923218, "rewards/frontier_coverage_5": 0.13771790713071824, "rewards/frontier_ece_reward": 0.015338649787008763, "signal/accuracy_reward/centered_abs_mean": 0.090032958984375, "signal/accuracy_reward/group_std_mean": 0.12576899230480193, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0450164794921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0450164794921875, "signal/advantage_abs_mean": 0.06141816526651382, "signal/advantage_pre_scale_abs_mean": 0.06141816526651382, "signal/advantage_pre_scale_std": 0.10529383420944213, "signal/advantage_std": 0.10529383420944213, "signal/brier_reward/centered_abs_mean": 0.15247377157211303, "signal/brier_reward/group_std_mean": 0.19677919149398804, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01905922144651413, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01905922144651413, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030233245342969894, "signal/confidence_uniqueness_reward/group_std_mean": 0.039144163578748704, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003779155667871237, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003779155667871237, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021210510516539217, "signal/frontier_aurc_reward/group_std_mean": 0.0034853797405958176, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.796681339736096e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.796681339736096e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18898755609989165, "signal/frontier_coverage_1/group_std_mean": 0.24688771665096282, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_10/centered_abs_mean": 0.18898755609989165, "signal/frontier_coverage_10/group_std_mean": 0.24688771665096282, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_15/centered_abs_mean": 0.18898755609989165, "signal/frontier_coverage_15/group_std_mean": 0.24688771665096282, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_20/centered_abs_mean": 0.18898755609989165, "signal/frontier_coverage_20/group_std_mean": 0.24688771665096282, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_25/centered_abs_mean": 0.16050014942884444, "signal/frontier_coverage_25/group_std_mean": 0.21017540693283082, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002872952586039901, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002872952586039901, "signal/frontier_coverage_5/centered_abs_mean": 0.18898755609989165, "signal/frontier_coverage_5/group_std_mean": 0.24688771665096282, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033828773070126773, "signal/frontier_ece_reward/centered_abs_mean": 0.015096320770680904, "signal/frontier_ece_reward/group_std_mean": 0.01860468164086342, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001887040096335113, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001887040096335113, "step": 115 }, { "calibration/aurc": 0.24020855256031118, "calibration/batch_distribution_entropy": 0.8819246681337859, "calibration/buffer_distribution_entropy": 0.9595033013104548, "calibration/confidence_entropy": 0.36571512554246827, "calibration/coverage@0%": 0.01640625, "calibration/coverage@1%": 0.01640625, "calibration/coverage@10%": 0.266015625, "calibration/coverage@15%": 0.35078125, "calibration/coverage@20%": 0.422265625, "calibration/coverage@25%": 0.48203125, "calibration/coverage@30%": 0.680859375, "calibration/coverage@5%": 0.18125, "calibration/ece": 0.13183292817767742, "calibration/mean_confidence": 0.4883596199957732, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 977.6, "completions/max_terminated_length": 643.6, "completions/mean_length": 162.63994140625, "completions/mean_terminated_length": 162.3724334716797, "completions/min_length": 72.4, "completions/min_terminated_length": 72.4, "epoch": 0.384, "grad_norm": 0.0009414847008883953, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 402173517.0, "reward": 1.028240156173706, "reward_std": 0.08490664958953857, "rewards/accuracy_reward": 0.5876953125, "rewards/brier_reward": 0.8151444077491761, "rewards/confidence_uniqueness_reward": 0.9464893937110901, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002180484868586063, "rewards/frontier_coverage_1": 0.12647273913025855, "rewards/frontier_coverage_10": 0.12647273913025855, "rewards/frontier_coverage_15": 0.12647273913025855, "rewards/frontier_coverage_20": 0.12413697615265847, "rewards/frontier_coverage_25": 0.08527236208319663, "rewards/frontier_coverage_5": 0.12647273913025855, "rewards/frontier_ece_reward": 0.012559224478900432, "signal/accuracy_reward/centered_abs_mean": 0.10203857421875, "signal/accuracy_reward/group_std_mean": 0.139366614818573, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.051019287109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051019287109375, "signal/advantage_abs_mean": 0.06343221440911292, "signal/advantage_pre_scale_abs_mean": 0.06343221440911292, "signal/advantage_pre_scale_std": 0.11096447557210923, "signal/advantage_std": 0.11096447557210923, "signal/brier_reward/centered_abs_mean": 0.14464540481567384, "signal/brier_reward/group_std_mean": 0.18787792026996614, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01808067560195923, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01808067560195923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02793549485504627, "signal/confidence_uniqueness_reward/group_std_mean": 0.0359924353659153, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003491936856880784, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003491936856880784, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002523245778866112, "signal/frontier_aurc_reward/group_std_mean": 0.0043451421894133094, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5166096970206124e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5166096970206124e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17531461119651795, "signal/frontier_coverage_1/group_std_mean": 0.2309555232524872, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_10/centered_abs_mean": 0.17531461119651795, "signal/frontier_coverage_10/group_std_mean": 0.2309555232524872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_15/centered_abs_mean": 0.17531461119651795, "signal/frontier_coverage_15/group_std_mean": 0.2309555232524872, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_20/centered_abs_mean": 0.17357857525348663, "signal/frontier_coverage_20/group_std_mean": 0.22865375578403474, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003107056301087141, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003107056301087141, "signal/frontier_coverage_25/centered_abs_mean": 0.10947014093399048, "signal/frontier_coverage_25/group_std_mean": 0.14513879716396333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019595154793933035, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019595154793933035, "signal/frontier_coverage_5/centered_abs_mean": 0.17531461119651795, "signal/frontier_coverage_5/group_std_mean": 0.2309555232524872, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031381313689053058, "signal/frontier_ece_reward/centered_abs_mean": 0.01200645174831152, "signal/frontier_ece_reward/group_std_mean": 0.01493366789072752, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00150080646853894, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00150080646853894, "step": 120 }, { "calibration/aurc": 0.2844994718888988, "calibration/batch_distribution_entropy": 0.9237969701468984, "calibration/buffer_distribution_entropy": 0.9619282325349795, "calibration/confidence_entropy": 0.4142039763975651, "calibration/coverage@0%": 0.011721043297455968, "calibration/coverage@1%": 0.011721043297455968, "calibration/coverage@10%": 0.03597572162426614, "calibration/coverage@15%": 0.10791034735812133, "calibration/coverage@20%": 0.3302172517123288, "calibration/coverage@25%": 0.4877041034735813, "calibration/coverage@30%": 0.5815114359099804, "calibration/coverage@5%": 0.032460096624266147, "calibration/ece": 0.1442927572230528, "calibration/mean_confidence": 0.5743742787719365, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 617.0, "completions/max_terminated_length": 428.4, "completions/mean_length": 160.984765625, "completions/mean_terminated_length": 160.8504638671875, "completions/min_length": 76.0, "completions/min_terminated_length": 76.0, "epoch": 0.4, "grad_norm": 0.001233375514857471, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 418858449.0, "reward": 1.0142970323562621, "reward_std": 0.09367316514253617, "rewards/accuracy_reward": 0.56884765625, "rewards/brier_reward": 0.7930649518966675, "rewards/confidence_uniqueness_reward": 0.9494104027748108, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002801778074353933, "rewards/frontier_coverage_1": 0.111895702034235, "rewards/frontier_coverage_10": 0.111895702034235, "rewards/frontier_coverage_15": 0.111895702034235, "rewards/frontier_coverage_20": 0.09835360199213028, "rewards/frontier_coverage_25": 0.07011410780251026, "rewards/frontier_coverage_5": 0.111895702034235, "rewards/frontier_ece_reward": 0.009083650819957257, "signal/accuracy_reward/centered_abs_mean": 0.118499755859375, "signal/accuracy_reward/group_std_mean": 0.16050875782966614, "signal/accuracy_reward/group_zero_std_frac": 0.525, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0592498779296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0592498779296875, "signal/advantage_abs_mean": 0.0702929750084877, "signal/advantage_pre_scale_abs_mean": 0.0702929750084877, "signal/advantage_pre_scale_std": 0.12010517567396164, "signal/advantage_std": 0.12010517567396164, "signal/brier_reward/centered_abs_mean": 0.15167818665504457, "signal/brier_reward/group_std_mean": 0.19488880634307862, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01895977333188057, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01895977333188057, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026428508386015893, "signal/confidence_uniqueness_reward/group_std_mean": 0.03375169932842255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0033035635482519866, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033035635482519866, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002832173928618431, "signal/frontier_aurc_reward/group_std_mean": 0.004560053441673517, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.069591134088114e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.069591134088114e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1735062450170517, "signal/frontier_coverage_1/group_std_mean": 0.22942977547645568, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_10/centered_abs_mean": 0.1735062450170517, "signal/frontier_coverage_10/group_std_mean": 0.22942977547645568, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_15/centered_abs_mean": 0.1735062450170517, "signal/frontier_coverage_15/group_std_mean": 0.22942977547645568, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_20/centered_abs_mean": 0.14573751091957093, "signal/frontier_coverage_20/group_std_mean": 0.19352927505970002, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026087014470249415, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026087014470249415, "signal/frontier_coverage_25/centered_abs_mean": 0.08992226272821427, "signal/frontier_coverage_25/group_std_mean": 0.11867372989654541, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016096084378659724, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016096084378659724, "signal/frontier_coverage_5/centered_abs_mean": 0.1735062450170517, "signal/frontier_coverage_5/group_std_mean": 0.22942977547645568, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003105761716142297, "signal/frontier_ece_reward/centered_abs_mean": 0.010532907396554946, "signal/frontier_ece_reward/group_std_mean": 0.013023488782346248, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0013166134245693683, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0013166134245693683, "step": 125 }, { "calibration/aurc": 0.27468807423750957, "calibration/batch_distribution_entropy": 0.9489113413581535, "calibration/buffer_distribution_entropy": 0.9616886407581575, "calibration/confidence_entropy": 0.44173120087722995, "calibration/coverage@0%": 0.008597572162426615, "calibration/coverage@1%": 0.008597572162426615, "calibration/coverage@10%": 0.1097694471624266, "calibration/coverage@15%": 0.18712007705479453, "calibration/coverage@20%": 0.27618334148727985, "calibration/coverage@25%": 0.4500871453033268, "calibration/coverage@30%": 0.6063937133072408, "calibration/coverage@5%": 0.032816322162426616, "calibration/ece": 0.10054416984275531, "calibration/mean_confidence": 0.5599183650598022, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 699.2, "completions/max_terminated_length": 499.0, "completions/mean_length": 161.9859375, "completions/mean_terminated_length": 161.85264892578124, "completions/min_length": 76.2, "completions/min_terminated_length": 76.2, "epoch": 0.416, "grad_norm": 0.0010082739172503352, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 435398369.0, "reward": 1.0111759543418883, "reward_std": 0.0877716675400734, "rewards/accuracy_reward": 0.5583984375, "rewards/brier_reward": 0.7999887347221375, "rewards/confidence_uniqueness_reward": 0.9553175568580627, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002307292679324746, "rewards/frontier_coverage_1": 0.11955121904611588, "rewards/frontier_coverage_10": 0.11955121904611588, "rewards/frontier_coverage_15": 0.11955121904611588, "rewards/frontier_coverage_20": 0.10541392564773559, "rewards/frontier_coverage_25": 0.0678506538271904, "rewards/frontier_coverage_5": 0.11955121904611588, "rewards/frontier_ece_reward": 0.00793801536783576, "signal/accuracy_reward/centered_abs_mean": 0.1123291015625, "signal/accuracy_reward/group_std_mean": 0.1460072174668312, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05616455078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05616455078125, "signal/advantage_abs_mean": 0.06770573481917382, "signal/advantage_pre_scale_abs_mean": 0.06770573481917382, "signal/advantage_pre_scale_std": 0.11403226554393768, "signal/advantage_std": 0.11403226554393768, "signal/brier_reward/centered_abs_mean": 0.15031678080558777, "signal/brier_reward/group_std_mean": 0.19109582304954528, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018789597600698472, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018789597600698472, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021022913232445716, "signal/confidence_uniqueness_reward/group_std_mean": 0.027267256006598472, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026278641540557145, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026278641540557145, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021706197410821916, "signal/frontier_aurc_reward/group_std_mean": 0.003645438142120838, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.885409387294203e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.885409387294203e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18729844689369202, "signal/frontier_coverage_1/group_std_mean": 0.24028244614601135, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_10/centered_abs_mean": 0.18729844689369202, "signal/frontier_coverage_10/group_std_mean": 0.24028244614601135, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_15/centered_abs_mean": 0.18729844689369202, "signal/frontier_coverage_15/group_std_mean": 0.24028244614601135, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_20/centered_abs_mean": 0.1524550050497055, "signal/frontier_coverage_20/group_std_mean": 0.19646928310394288, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027289445977658035, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027289445977658035, "signal/frontier_coverage_25/centered_abs_mean": 0.08746959716081619, "signal/frontier_coverage_25/group_std_mean": 0.11204418540000916, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001565705775283277, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001565705775283277, "signal/frontier_coverage_5/centered_abs_mean": 0.18729844689369202, "signal/frontier_coverage_5/group_std_mean": 0.24028244614601135, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003352642059326172, "signal/frontier_ece_reward/centered_abs_mean": 0.00957362912595272, "signal/frontier_ece_reward/group_std_mean": 0.01186007559299469, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00119670364074409, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00119670364074409, "step": 130 }, { "calibration/aurc": 0.2170116955736013, "calibration/batch_distribution_entropy": 0.920250529181654, "calibration/buffer_distribution_entropy": 0.9593461850745699, "calibration/confidence_entropy": 0.41075450223345805, "calibration/coverage@0%": 0.03126758194716243, "calibration/coverage@1%": 0.03126758194716243, "calibration/coverage@10%": 0.30654583537181995, "calibration/coverage@15%": 0.4034712879158513, "calibration/coverage@20%": 0.49061888454011743, "calibration/coverage@25%": 0.6015625, "calibration/coverage@30%": 0.695703125, "calibration/coverage@5%": 0.22365536325831706, "calibration/ece": 0.12825262002892726, "calibration/mean_confidence": 0.5729583550437628, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 861.4, "completions/max_terminated_length": 403.8, "completions/mean_length": 161.29306640625, "completions/mean_terminated_length": 161.02431030273436, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.432, "grad_norm": 0.0009220021311193705, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 452064346.0, "reward": 1.0282735109329224, "reward_std": 0.07874491959810256, "rewards/accuracy_reward": 0.59111328125, "rewards/brier_reward": 0.8130090713500977, "rewards/confidence_uniqueness_reward": 0.952438759803772, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0019511275691911577, "rewards/frontier_coverage_1": 0.1167385719716549, "rewards/frontier_coverage_10": 0.1167385719716549, "rewards/frontier_coverage_15": 0.1167385719716549, "rewards/frontier_coverage_20": 0.09187164157629013, "rewards/frontier_coverage_25": 0.06807960644364357, "rewards/frontier_coverage_5": 0.1167385719716549, "rewards/frontier_ece_reward": 0.00796552887186408, "signal/accuracy_reward/centered_abs_mean": 0.100054931640625, "signal/accuracy_reward/group_std_mean": 0.12813358157873153, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0500274658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0500274658203125, "signal/advantage_abs_mean": 0.060691606253385544, "signal/advantage_pre_scale_abs_mean": 0.060691606253385544, "signal/advantage_pre_scale_std": 0.10658708661794662, "signal/advantage_std": 0.10658708661794662, "signal/brier_reward/centered_abs_mean": 0.1367792531847954, "signal/brier_reward/group_std_mean": 0.17578783333301545, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017097406648099424, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017097406648099424, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023115601390600204, "signal/confidence_uniqueness_reward/group_std_mean": 0.03054891638457775, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028894501738250256, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028894501738250256, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018829480512067675, "signal/frontier_aurc_reward/group_std_mean": 0.0031504146289080383, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3704767702147365e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3704767702147365e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17200512290000916, "signal/frontier_coverage_1/group_std_mean": 0.22458739280700685, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_10/centered_abs_mean": 0.17200512290000916, "signal/frontier_coverage_10/group_std_mean": 0.22458739280700685, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_15/centered_abs_mean": 0.17200512290000916, "signal/frontier_coverage_15/group_std_mean": 0.22458739280700685, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_20/centered_abs_mean": 0.12090182155370713, "signal/frontier_coverage_20/group_std_mean": 0.15927328169345856, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002164142485707998, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002164142485707998, "signal/frontier_coverage_25/centered_abs_mean": 0.07504236698150635, "signal/frontier_coverage_25/group_std_mean": 0.09683282524347306, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013432582607492804, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013432582607492804, "signal/frontier_coverage_5/centered_abs_mean": 0.17200512290000916, "signal/frontier_coverage_5/group_std_mean": 0.22458739280700685, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030788916628807783, "signal/frontier_ece_reward/centered_abs_mean": 0.008515550382435321, "signal/frontier_ece_reward/group_std_mean": 0.01071312427520752, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010644437978044152, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010644437978044152, "step": 135 }, { "calibration/aurc": 0.2344617037580517, "calibration/batch_distribution_entropy": 0.9158115214558646, "calibration/buffer_distribution_entropy": 0.9532071558973751, "calibration/confidence_entropy": 0.43419576863505027, "calibration/coverage@0%": 0.011328125, "calibration/coverage@1%": 0.011328125, "calibration/coverage@10%": 0.094921875, "calibration/coverage@15%": 0.19296875, "calibration/coverage@20%": 0.3484375, "calibration/coverage@25%": 0.60390625, "calibration/coverage@30%": 0.79140625, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.11601793193579395, "calibration/mean_confidence": 0.6193143780017666, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 448.6, "completions/max_terminated_length": 448.6, "completions/mean_length": 164.63623046875, "completions/mean_terminated_length": 164.63623046875, "completions/min_length": 81.6, "completions/min_terminated_length": 81.6, "epoch": 0.448, "grad_norm": 0.0026026626583188772, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 468703021.0, "reward": 1.0116289138793946, "reward_std": 0.08069856613874435, "rewards/accuracy_reward": 0.55439453125, "rewards/brier_reward": 0.8098321080207824, "rewards/confidence_uniqueness_reward": 0.9554630517959595, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0024544465355575084, "rewards/frontier_coverage_1": 0.1361882671713829, "rewards/frontier_coverage_10": 0.1361882671713829, "rewards/frontier_coverage_15": 0.1361882671713829, "rewards/frontier_coverage_20": 0.10684386640787125, "rewards/frontier_coverage_25": 0.07347736358642579, "rewards/frontier_coverage_5": 0.1361882671713829, "rewards/frontier_ece_reward": 0.00746011808514595, "signal/accuracy_reward/centered_abs_mean": 0.095538330078125, "signal/accuracy_reward/group_std_mean": 0.1277057021856308, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477691650390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477691650390625, "signal/advantage_abs_mean": 0.06105227619409561, "signal/advantage_pre_scale_abs_mean": 0.06105227619409561, "signal/advantage_pre_scale_std": 0.10724145770072938, "signal/advantage_std": 0.10724145770072938, "signal/brier_reward/centered_abs_mean": 0.13868603557348252, "signal/brier_reward/group_std_mean": 0.178034707903862, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017335754446685316, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017335754446685316, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0210552129894495, "signal/confidence_uniqueness_reward/group_std_mean": 0.027469881996512412, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0026319016236811877, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026319016236811877, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002177398931235075, "signal/frontier_aurc_reward/group_std_mean": 0.003708243044093251, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.897543938364833e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.897543938364833e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17191272974014282, "signal/frontier_coverage_1/group_std_mean": 0.22308169305324554, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_10/centered_abs_mean": 0.17191272974014282, "signal/frontier_coverage_10/group_std_mean": 0.22308169305324554, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_15/centered_abs_mean": 0.17191272974014282, "signal/frontier_coverage_15/group_std_mean": 0.22308169305324554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_20/centered_abs_mean": 0.11961922645568848, "signal/frontier_coverage_20/group_std_mean": 0.1564598023891449, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021411839872598646, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021411839872598646, "signal/frontier_coverage_25/centered_abs_mean": 0.07283841371536255, "signal/frontier_coverage_25/group_std_mean": 0.09265869408845902, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013038075994700193, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013038075994700193, "signal/frontier_coverage_5/centered_abs_mean": 0.17191272974014282, "signal/frontier_coverage_5/group_std_mean": 0.22308169305324554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030772378202527763, "signal/frontier_ece_reward/centered_abs_mean": 0.008390486426651477, "signal/frontier_ece_reward/group_std_mean": 0.010463342070579529, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010488108033314346, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010488108033314346, "step": 140 }, { "calibration/aurc": 0.3035030156088047, "calibration/batch_distribution_entropy": 0.9494077028090173, "calibration/buffer_distribution_entropy": 0.9476631169760029, "calibration/confidence_entropy": 0.44092445270135744, "calibration/coverage@0%": 0.006259937622309197, "calibration/coverage@1%": 0.006259937622309197, "calibration/coverage@10%": 0.026572437622309198, "calibration/coverage@15%": 0.1161295254403131, "calibration/coverage@20%": 0.3525486179060665, "calibration/coverage@25%": 0.4424389982876712, "calibration/coverage@30%": 0.5639516572896281, "calibration/coverage@5%": 0.006259937622309197, "calibration/ece": 0.12671306244307387, "calibration/mean_confidence": 0.5430596285708069, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1117.8, "completions/max_terminated_length": 479.6, "completions/mean_length": 163.9982421875, "completions/mean_terminated_length": 163.59661254882812, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.464, "grad_norm": 0.0009285790147259831, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 485553179.0, "reward": 0.9942840814590455, "reward_std": 0.07983254492282868, "rewards/accuracy_reward": 0.5244140625, "rewards/brier_reward": 0.7915266156196594, "rewards/confidence_uniqueness_reward": 0.9534668684005737, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026315408293157818, "rewards/frontier_coverage_1": 0.14171417951583862, "rewards/frontier_coverage_10": 0.14171417951583862, "rewards/frontier_coverage_15": 0.14171417951583862, "rewards/frontier_coverage_20": 0.10749387815594673, "rewards/frontier_coverage_25": 0.06779449284076691, "rewards/frontier_coverage_5": 0.14171417951583862, "rewards/frontier_ece_reward": 0.006896446458995342, "signal/accuracy_reward/centered_abs_mean": 0.0907958984375, "signal/accuracy_reward/group_std_mean": 0.1230411022901535, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04539794921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04539794921875, "signal/advantage_abs_mean": 0.05986447930335999, "signal/advantage_pre_scale_abs_mean": 0.05986447930335999, "signal/advantage_pre_scale_std": 0.10530668199062347, "signal/advantage_std": 0.10530668199062347, "signal/brier_reward/centered_abs_mean": 0.14057967960834503, "signal/brier_reward/group_std_mean": 0.18116458952426912, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01757245995104313, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01757245995104313, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022300581261515618, "signal/confidence_uniqueness_reward/group_std_mean": 0.029358403012156486, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0027875726576894523, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027875726576894523, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002173250797204673, "signal/frontier_aurc_reward/group_std_mean": 0.0035278352443128824, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.890118823619559e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.890118823619559e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16866520643234253, "signal/frontier_coverage_1/group_std_mean": 0.2227775514125824, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_10/centered_abs_mean": 0.16866520643234253, "signal/frontier_coverage_10/group_std_mean": 0.2227775514125824, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_15/centered_abs_mean": 0.16866520643234253, "signal/frontier_coverage_15/group_std_mean": 0.2227775514125824, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_20/centered_abs_mean": 0.11727796494960785, "signal/frontier_coverage_20/group_std_mean": 0.15594127774238586, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020992755657061935, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020992755657061935, "signal/frontier_coverage_25/centered_abs_mean": 0.07200475186109542, "signal/frontier_coverage_25/group_std_mean": 0.09224026650190353, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012888850411400198, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012888850411400198, "signal/frontier_coverage_5/centered_abs_mean": 0.16866520643234253, "signal/frontier_coverage_5/group_std_mean": 0.2227775514125824, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030191069934517147, "signal/frontier_ece_reward/centered_abs_mean": 0.00801121462136507, "signal/frontier_ece_reward/group_std_mean": 0.010085698775947094, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010014018276706337, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010014018276706337, "step": 145 }, { "calibration/aurc": 0.2490675060818095, "calibration/batch_distribution_entropy": 0.9173210190726669, "calibration/buffer_distribution_entropy": 0.9421114116879643, "calibration/confidence_entropy": 0.4066536328827809, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.02890625, "calibration/coverage@15%": 0.2320450097847358, "calibration/coverage@20%": 0.3984833659491194, "calibration/coverage@25%": 0.5518025318003914, "calibration/coverage@30%": 0.7471654843444228, "calibration/coverage@5%": 0.00859375, "calibration/ece": 0.12770797905692954, "calibration/mean_confidence": 0.5467555666867103, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 863.6, "completions/max_terminated_length": 481.4, "completions/mean_length": 159.551953125, "completions/mean_terminated_length": 159.2826934814453, "completions/min_length": 80.8, "completions/min_terminated_length": 80.8, "epoch": 0.48, "grad_norm": 0.0010119550861418247, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 502235023.0, "reward": 1.018705701828003, "reward_std": 0.08774305582046509, "rewards/accuracy_reward": 0.5771484375, "rewards/brier_reward": 0.7982451438903808, "rewards/confidence_uniqueness_reward": 0.9457098007202148, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002013521012850106, "rewards/frontier_coverage_1": 0.11858767569065094, "rewards/frontier_coverage_10": 0.11858767569065094, "rewards/frontier_coverage_15": 0.11858767569065094, "rewards/frontier_coverage_20": 0.0924240618944168, "rewards/frontier_coverage_25": 0.06828025579452515, "rewards/frontier_coverage_5": 0.11858767569065094, "rewards/frontier_ece_reward": 0.007226689532399177, "signal/accuracy_reward/centered_abs_mean": 0.1236328125, "signal/accuracy_reward/group_std_mean": 0.1596635937690735, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06181640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06181640625, "signal/advantage_abs_mean": 0.066917584836483, "signal/advantage_pre_scale_abs_mean": 0.066917584836483, "signal/advantage_pre_scale_std": 0.1149211123585701, "signal/advantage_std": 0.1149211123585701, "signal/brier_reward/centered_abs_mean": 0.1455012708902359, "signal/brier_reward/group_std_mean": 0.18579219579696654, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018187658861279488, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018187658861279488, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02829902097582817, "signal/confidence_uniqueness_reward/group_std_mean": 0.036865927278995514, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003537377621978521, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003537377621978521, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018063589930534362, "signal/frontier_aurc_reward/group_std_mean": 0.002892591571435332, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.233382631151471e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.233382631151471e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19047014713287352, "signal/frontier_coverage_1/group_std_mean": 0.24632398784160614, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_10/centered_abs_mean": 0.19047014713287352, "signal/frontier_coverage_10/group_std_mean": 0.24632398784160614, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_15/centered_abs_mean": 0.19047014713287352, "signal/frontier_coverage_15/group_std_mean": 0.24632398784160614, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_20/centered_abs_mean": 0.1285821259021759, "signal/frontier_coverage_20/group_std_mean": 0.16771571040153505, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023016199003905056, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023016199003905056, "signal/frontier_coverage_25/centered_abs_mean": 0.07739093005657197, "signal/frontier_coverage_25/group_std_mean": 0.09800889045000076, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013852976029738785, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013852976029738785, "signal/frontier_coverage_5/centered_abs_mean": 0.19047014713287352, "signal/frontier_coverage_5/group_std_mean": 0.24632398784160614, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00340941553004086, "signal/frontier_ece_reward/centered_abs_mean": 0.008578121662139893, "signal/frontier_ece_reward/group_std_mean": 0.010665779560804367, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010722652077674866, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010722652077674866, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.4865714451779266, "eval_calibration/batch_distribution_entropy": 0.8811929958426155, "eval_calibration/buffer_distribution_entropy": 0.9384680529539475, "eval_calibration/confidence_entropy": 0.3955379708976029, "eval_calibration/coverage@0%": 0.0234375, "eval_calibration/coverage@1%": 0.0234375, "eval_calibration/coverage@10%": 0.0234375, "eval_calibration/coverage@15%": 0.0234375, "eval_calibration/coverage@20%": 0.09375, "eval_calibration/coverage@25%": 0.109375, "eval_calibration/coverage@30%": 0.109375, "eval_calibration/coverage@5%": 0.0234375, "eval_calibration/ece": 0.2584375, "eval_calibration/mean_confidence": 0.5084374999999999, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 287.25, "eval_completions/max_terminated_length": 287.25, "eval_completions/mean_length": 160.2589569091797, "eval_completions/mean_terminated_length": 160.2589569091797, "eval_completions/min_length": 91.5, "eval_completions/min_terminated_length": 91.5, "eval_loss": 0.0, "eval_num_tokens": 502235023.0, "eval_reward": 0.936515599489212, "eval_reward_std": 0.2348359413444996, "eval_rewards/accuracy_reward": 0.4140625, "eval_rewards/brier_reward": 0.7844546884298325, "eval_rewards/confidence_uniqueness_reward": 0.889892578125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0032348172389902174, "eval_rewards/frontier_coverage_1": 0.2147538997232914, "eval_rewards/frontier_coverage_10": 0.2147538997232914, "eval_rewards/frontier_coverage_15": 0.2147538997232914, "eval_rewards/frontier_coverage_20": 0.140639740973711, "eval_rewards/frontier_coverage_25": 0.07195262983441353, "eval_rewards/frontier_coverage_5": 0.2147538997232914, "eval_rewards/frontier_ece_reward": 0.008536459412425756, "eval_runtime": 17.4381, "eval_samples_per_second": 28.673, "eval_signal/accuracy_reward/centered_abs_mean": 0.469482421875, "eval_signal/accuracy_reward/group_std_mean": 0.4919169917702675, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2347412109375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2347412109375, "eval_signal/advantage_abs_mean": 0.2155938372015953, "eval_signal/advantage_pre_scale_abs_mean": 0.2155938372015953, "eval_signal/advantage_pre_scale_std": 0.23217838630080223, "eval_signal/advantage_std": 0.23217838630080223, "eval_signal/brier_reward/centered_abs_mean": 0.23349540308117867, "eval_signal/brier_reward/group_std_mean": 0.28603896498680115, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029186925385147333, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029186925385147333, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0508575439453125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.061956305988132954, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0063571929931640625, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0063571929931640625, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.0038491138839162886, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007427805452607572, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.889913493068889e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.889913493068889e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3643998056650162, "eval_signal/frontier_coverage_1/group_std_mean": 0.44875599443912506, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3643998056650162, "eval_signal/frontier_coverage_10/group_std_mean": 0.44875599443912506, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3643998056650162, "eval_signal/frontier_coverage_15/group_std_mean": 0.44875599443912506, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2309923656284809, "eval_signal/frontier_coverage_20/group_std_mean": 0.28767409920692444, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004134763090405613, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004134763090405613, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.11495377495884895, "eval_signal/frontier_coverage_25/group_std_mean": 0.14584658294916153, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020576725364662707, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020576725364662707, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3643998056650162, "eval_signal/frontier_coverage_5/group_std_mean": 0.44875599443912506, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006522756069898605, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.012437232304364443, "eval_signal/frontier_ece_reward/group_std_mean": 0.014940991066396236, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0015546540380455554, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0015546540380455554, "eval_steps_per_second": 0.229, "step": 150 }, { "epoch": 0.48, "step": 150, "train_probe_calibration/aurc": 0.19899992017321977, "train_probe_calibration/batch_distribution_entropy": 0.891369256496254, "train_probe_calibration/buffer_distribution_entropy": 0.9381455575303994, "train_probe_calibration/confidence_entropy": 0.44171598086053, "train_probe_calibration/coverage@0%": 0.140625, "train_probe_calibration/coverage@1%": 0.140625, "train_probe_calibration/coverage@10%": 0.3203125, "train_probe_calibration/coverage@15%": 0.46875, "train_probe_calibration/coverage@20%": 0.671875, "train_probe_calibration/coverage@25%": 0.7734375, "train_probe_calibration/coverage@30%": 0.8359375, "train_probe_calibration/coverage@5%": 0.140625, "train_probe_calibration/ece": 0.162015625, "train_probe_calibration/mean_confidence": 0.5688437500000001, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 285.5, "train_probe_completions/max_terminated_length": 285.5, "train_probe_completions/mean_length": 157.96134185791016, "train_probe_completions/mean_terminated_length": 157.96134185791016, "train_probe_completions/min_length": 84.75, "train_probe_completions/min_terminated_length": 84.75, "train_probe_loss": 0.0, "train_probe_num_tokens": 502235023.0, "train_probe_reward": 1.041875422000885, "train_probe_reward_std": 0.22206757217645645, "train_probe_rewards/accuracy_reward": 0.634765625, "train_probe_rewards/brier_reward": 0.8183042258024216, "train_probe_rewards/confidence_uniqueness_reward": 0.892333984375, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0014572142390534282, "train_probe_rewards/frontier_coverage_1": 0.10037144646048546, "train_probe_rewards/frontier_coverage_10": 0.10037144646048546, "train_probe_rewards/frontier_coverage_15": 0.10037144646048546, "train_probe_rewards/frontier_coverage_20": 0.07581588346511126, "train_probe_rewards/frontier_coverage_25": 0.06856801547110081, "train_probe_rewards/frontier_coverage_5": 0.10037144646048546, "train_probe_rewards/frontier_ece_reward": 0.007343103410676122, "train_probe_runtime": 17.3511, "train_probe_samples_per_second": 28.817, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4532470703125, "train_probe_signal/accuracy_reward/group_std_mean": 0.4832051396369934, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22662353515625, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22662353515625, "train_probe_signal/advantage_abs_mean": 0.20156240463256836, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20156240463256836, "train_probe_signal/advantage_pre_scale_std": 0.21973057836294174, "train_probe_signal/advantage_std": 0.21973057836294174, "train_probe_signal/brier_reward/centered_abs_mean": 0.20465417951345444, "train_probe_signal/brier_reward/group_std_mean": 0.2636885643005371, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025581772439181805, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.025581772439181805, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045745849609375, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05602440424263477, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005718231201171875, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005718231201171875, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002107554581016302, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003866996383294463, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.772522450162796e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.772522450162796e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.35481464117765427, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.46584365516901016, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.35481464117765427, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.46584365516901016, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.35481464117765427, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.46584365516901016, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.22063589468598366, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.298631876707077, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003949382517021149, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003949382517021149, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.11186387576162815, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.14589739218354225, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020023633260279894, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020023633260279894, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.35481464117765427, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.46584365516901016, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006351181888021529, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.01217859354801476, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.015122672310099006, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001522324193501845, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001522324193501845, "train_probe_steps_per_second": 0.231 }, { "calibration/aurc": 0.29528651859816535, "calibration/batch_distribution_entropy": 0.921602674569398, "calibration/buffer_distribution_entropy": 0.9364619815831505, "calibration/confidence_entropy": 0.40908701768177347, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.14453125, "calibration/coverage@15%": 0.226171875, "calibration/coverage@20%": 0.346875, "calibration/coverage@25%": 0.4140625, "calibration/coverage@30%": 0.4953125, "calibration/coverage@5%": 0.1, "calibration/ece": 0.11914970568073154, "calibration/mean_confidence": 0.5459722226119128, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 646.6, "completions/max_terminated_length": 427.0, "completions/mean_length": 160.25498046875, "completions/mean_terminated_length": 160.12051391601562, "completions/min_length": 77.0, "completions/min_terminated_length": 77.0, "epoch": 0.496, "grad_norm": 0.0011867131106555462, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 519183874.0, "reward": 1.0365911722183228, "reward_std": 0.07778663039207459, "rewards/accuracy_reward": 0.61103515625, "rewards/brier_reward": 0.8111203789710999, "rewards/confidence_uniqueness_reward": 0.9524305701255799, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0018819471122696995, "rewards/frontier_coverage_1": 0.10418144315481186, "rewards/frontier_coverage_10": 0.10418144315481186, "rewards/frontier_coverage_15": 0.10418144315481186, "rewards/frontier_coverage_20": 0.07317600697278977, "rewards/frontier_coverage_25": 0.06316131204366685, "rewards/frontier_coverage_5": 0.10418144315481186, "rewards/frontier_ece_reward": 0.006889772973954678, "signal/accuracy_reward/centered_abs_mean": 0.093475341796875, "signal/accuracy_reward/group_std_mean": 0.1266740679740906, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0467376708984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0467376708984375, "signal/advantage_abs_mean": 0.058039630949497226, "signal/advantage_pre_scale_abs_mean": 0.058039630949497226, "signal/advantage_pre_scale_std": 0.10540584474802017, "signal/advantage_std": 0.10540584474802017, "signal/brier_reward/centered_abs_mean": 0.13347503244876863, "signal/brier_reward/group_std_mean": 0.17193097174167632, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01668437905609608, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01668437905609608, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0233658567070961, "signal/confidence_uniqueness_reward/group_std_mean": 0.030634360387921333, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029207320883870127, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029207320883870127, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018023386830464006, "signal/frontier_aurc_reward/group_std_mean": 0.0031401820946484805, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.226186199754011e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.226186199754011e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1628864049911499, "signal/frontier_coverage_1/group_std_mean": 0.21382062137126923, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_10/centered_abs_mean": 0.1628864049911499, "signal/frontier_coverage_10/group_std_mean": 0.21382062137126923, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_15/centered_abs_mean": 0.1628864049911499, "signal/frontier_coverage_15/group_std_mean": 0.21382062137126923, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_20/centered_abs_mean": 0.10325838029384612, "signal/frontier_coverage_20/group_std_mean": 0.136458557844162, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018483249470591546, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018483249470591546, "signal/frontier_coverage_25/centered_abs_mean": 0.0682972326874733, "signal/frontier_coverage_25/group_std_mean": 0.08658059686422348, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012225204147398472, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012225204147398472, "signal/frontier_coverage_5/centered_abs_mean": 0.1628864049911499, "signal/frontier_coverage_5/group_std_mean": 0.21382062137126923, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002915666624903679, "signal/frontier_ece_reward/centered_abs_mean": 0.007767515070736408, "signal/frontier_ece_reward/group_std_mean": 0.00975795928388834, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000970939383842051, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000970939383842051, "step": 155 }, { "calibration/aurc": 0.25764350837523864, "calibration/batch_distribution_entropy": 0.9448658648442981, "calibration/buffer_distribution_entropy": 0.934086791389927, "calibration/confidence_entropy": 0.4335419018545725, "calibration/coverage@0%": 0.008993548189823874, "calibration/coverage@1%": 0.008993548189823874, "calibration/coverage@10%": 0.23697330601761254, "calibration/coverage@15%": 0.32298419153620356, "calibration/coverage@20%": 0.3902037977005871, "calibration/coverage@25%": 0.518359375, "calibration/coverage@30%": 0.597265625, "calibration/coverage@5%": 0.11814380503913893, "calibration/ece": 0.14449420275244873, "calibration/mean_confidence": 0.5363735101426749, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.6, "completions/max_terminated_length": 387.6, "completions/mean_length": 158.7716796875, "completions/mean_terminated_length": 158.7716796875, "completions/min_length": 81.2, "completions/min_terminated_length": 81.2, "epoch": 0.512, "grad_norm": 0.0010637511732056737, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 535955360.0, "reward": 1.0332459449768066, "reward_std": 0.07940305918455123, "rewards/accuracy_reward": 0.6001953125, "rewards/brier_reward": 0.8189030528068543, "rewards/confidence_uniqueness_reward": 0.9518810272216797, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.001749542192555964, "rewards/frontier_coverage_1": 0.11576533690094948, "rewards/frontier_coverage_10": 0.11576533690094948, "rewards/frontier_coverage_15": 0.11576533690094948, "rewards/frontier_coverage_20": 0.08045043498277664, "rewards/frontier_coverage_25": 0.07256748080253601, "rewards/frontier_coverage_5": 0.11576533690094948, "rewards/frontier_ece_reward": 0.007211483735591173, "signal/accuracy_reward/centered_abs_mean": 0.0985107421875, "signal/accuracy_reward/group_std_mean": 0.13301554769277574, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04925537109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04925537109375, "signal/advantage_abs_mean": 0.05924607962369919, "signal/advantage_pre_scale_abs_mean": 0.05924607962369919, "signal/advantage_pre_scale_std": 0.10775545537471772, "signal/advantage_std": 0.10775545537471772, "signal/brier_reward/centered_abs_mean": 0.12845354974269868, "signal/brier_reward/group_std_mean": 0.1667891651391983, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016056693717837335, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016056693717837335, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02351841777563095, "signal/confidence_uniqueness_reward/group_std_mean": 0.030801539495587348, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002939802221953869, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002939802221953869, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.001706664077937603, "signal/frontier_aurc_reward/group_std_mean": 0.0029680487932637334, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.054928674828261e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.054928674828261e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16018467545509338, "signal/frontier_coverage_1/group_std_mean": 0.21153274178504944, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_10/centered_abs_mean": 0.16018467545509338, "signal/frontier_coverage_10/group_std_mean": 0.21153274178504944, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_15/centered_abs_mean": 0.16018467545509338, "signal/frontier_coverage_15/group_std_mean": 0.21153274178504944, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_20/centered_abs_mean": 0.09345034509897232, "signal/frontier_coverage_20/group_std_mean": 0.12456403076648712, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00167276116553694, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00167276116553694, "signal/frontier_coverage_25/centered_abs_mean": 0.06614456176757813, "signal/frontier_coverage_25/group_std_mean": 0.08370408713817597, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011839876184239983, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011839876184239983, "signal/frontier_coverage_5/centered_abs_mean": 0.16018467545509338, "signal/frontier_coverage_5/group_std_mean": 0.21153274178504944, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028673056978732346, "signal/frontier_ece_reward/centered_abs_mean": 0.007462390977889299, "signal/frontier_ece_reward/group_std_mean": 0.009389066137373447, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009327988722361624, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009327988722361624, "step": 160 }, { "calibration/aurc": 0.1528418915395085, "calibration/batch_distribution_entropy": 0.9146828022473686, "calibration/buffer_distribution_entropy": 0.9326836952910534, "calibration/confidence_entropy": 0.39974539369611933, "calibration/coverage@0%": 0.02421875, "calibration/coverage@1%": 0.02421875, "calibration/coverage@10%": 0.445703125, "calibration/coverage@15%": 0.590234375, "calibration/coverage@20%": 0.705078125, "calibration/coverage@25%": 0.8046875, "calibration/coverage@30%": 0.893359375, "calibration/coverage@5%": 0.165234375, "calibration/ece": 0.1016723354137224, "calibration/mean_confidence": 0.5691894614612776, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 658.6, "completions/max_terminated_length": 433.4, "completions/mean_length": 157.12998046875, "completions/mean_terminated_length": 156.9954620361328, "completions/min_length": 78.0, "completions/min_terminated_length": 78.0, "epoch": 0.528, "grad_norm": 0.0010418170131742954, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 552593907.0, "reward": 1.0281262636184691, "reward_std": 0.0788488432765007, "rewards/accuracy_reward": 0.5865234375, "rewards/brier_reward": 0.821136748790741, "rewards/confidence_uniqueness_reward": 0.9473978161811829, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0018390015000477433, "rewards/frontier_coverage_1": 0.13756178915500641, "rewards/frontier_coverage_10": 0.13756178915500641, "rewards/frontier_coverage_15": 0.13756178915500641, "rewards/frontier_coverage_20": 0.09277231395244598, "rewards/frontier_coverage_25": 0.08103752583265304, "rewards/frontier_coverage_5": 0.13756178915500641, "rewards/frontier_ece_reward": 0.007350740581750869, "signal/accuracy_reward/centered_abs_mean": 0.10626220703125, "signal/accuracy_reward/group_std_mean": 0.13978690654039383, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053131103515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053131103515625, "signal/advantage_abs_mean": 0.060259700566530225, "signal/advantage_pre_scale_abs_mean": 0.060259700566530225, "signal/advantage_pre_scale_std": 0.10816312432289124, "signal/advantage_std": 0.10816312432289124, "signal/brier_reward/centered_abs_mean": 0.13122203350067138, "signal/brier_reward/group_std_mean": 0.16875889003276826, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016402754187583923, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016402754187583923, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025891555473208427, "signal/confidence_uniqueness_reward/group_std_mean": 0.0340937253087759, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032364444341510534, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032364444341510534, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017435794696211814, "signal/frontier_aurc_reward/group_std_mean": 0.0029448256362229587, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.1210070665110835e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.1210070665110835e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17309306263923646, "signal/frontier_coverage_1/group_std_mean": 0.22656363546848296, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_10/centered_abs_mean": 0.17309306263923646, "signal/frontier_coverage_10/group_std_mean": 0.22656363546848296, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_15/centered_abs_mean": 0.17309306263923646, "signal/frontier_coverage_15/group_std_mean": 0.22656363546848296, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_20/centered_abs_mean": 0.10015229880809784, "signal/frontier_coverage_20/group_std_mean": 0.13174699544906615, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017927261302247643, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017927261302247643, "signal/frontier_coverage_25/centered_abs_mean": 0.06691559106111526, "signal/frontier_coverage_25/group_std_mean": 0.08470007181167602, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011977890972048044, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011977890972048044, "signal/frontier_coverage_5/centered_abs_mean": 0.17309306263923646, "signal/frontier_coverage_5/group_std_mean": 0.22656363546848296, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003098365804180503, "signal/frontier_ece_reward/centered_abs_mean": 0.0074427520856261255, "signal/frontier_ece_reward/group_std_mean": 0.009379717521369457, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009303440107032657, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009303440107032657, "step": 165 }, { "calibration/aurc": 0.17694588682690862, "calibration/batch_distribution_entropy": 0.8658933533472686, "calibration/buffer_distribution_entropy": 0.9297515304240533, "calibration/confidence_entropy": 0.3808740605876334, "calibration/coverage@0%": 0.019140625, "calibration/coverage@1%": 0.019140625, "calibration/coverage@10%": 0.329296875, "calibration/coverage@15%": 0.53828125, "calibration/coverage@20%": 0.686328125, "calibration/coverage@25%": 0.770703125, "calibration/coverage@30%": 0.8421875, "calibration/coverage@5%": 0.064453125, "calibration/ece": 0.07548029076484816, "calibration/mean_confidence": 0.6028442686101517, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 876.4, "completions/max_terminated_length": 432.8, "completions/mean_length": 158.95498046875, "completions/mean_terminated_length": 158.68636169433594, "completions/min_length": 79.0, "completions/min_terminated_length": 79.0, "epoch": 0.544, "grad_norm": 0.0010623829439282417, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 569385190.0, "reward": 1.0418287992477417, "reward_std": 0.08479072451591492, "rewards/accuracy_reward": 0.622265625, "rewards/brier_reward": 0.8150393009185791, "rewards/confidence_uniqueness_reward": 0.9497189283370971, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002168457116931677, "rewards/frontier_coverage_1": 0.09459788501262664, "rewards/frontier_coverage_10": 0.09459788501262664, "rewards/frontier_coverage_15": 0.09174881279468536, "rewards/frontier_coverage_20": 0.06693983972072601, "rewards/frontier_coverage_25": 0.08642307072877883, "rewards/frontier_coverage_5": 0.09459788501262664, "rewards/frontier_ece_reward": 0.006162353791296482, "signal/accuracy_reward/centered_abs_mean": 0.108935546875, "signal/accuracy_reward/group_std_mean": 0.14547575116157532, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0544677734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0544677734375, "signal/advantage_abs_mean": 0.06390135288238526, "signal/advantage_pre_scale_abs_mean": 0.06390135288238526, "signal/advantage_pre_scale_std": 0.11458454579114914, "signal/advantage_std": 0.11458454579114914, "signal/brier_reward/centered_abs_mean": 0.1370469719171524, "signal/brier_reward/group_std_mean": 0.1739386260509491, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01713087148964405, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01713087148964405, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025660135224461554, "signal/confidence_uniqueness_reward/group_std_mean": 0.0340105090290308, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032075169030576943, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032075169030576943, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002310140198096633, "signal/frontier_aurc_reward/group_std_mean": 0.003973044548183679, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.135150738875382e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.135150738875382e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16189839243888854, "signal/frontier_coverage_1/group_std_mean": 0.21171375811100007, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_coverage_10/centered_abs_mean": 0.16189839243888854, "signal/frontier_coverage_10/group_std_mean": 0.21171375811100007, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_coverage_15/centered_abs_mean": 0.15325535833835602, "signal/frontier_coverage_15/group_std_mean": 0.20059145987033844, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027432709001004698, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027432709001004698, "signal/frontier_coverage_20/centered_abs_mean": 0.09184687733650207, "signal/frontier_coverage_20/group_std_mean": 0.12045546323060989, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016440590377897024, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016440590377897024, "signal/frontier_coverage_25/centered_abs_mean": 0.07399061620235443, "signal/frontier_coverage_25/group_std_mean": 0.09224920123815536, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013244319707155228, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013244319707155228, "signal/frontier_coverage_5/centered_abs_mean": 0.16189839243888854, "signal/frontier_coverage_5/group_std_mean": 0.21171375811100007, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002897981042042375, "signal/frontier_ece_reward/centered_abs_mean": 0.007373248692601919, "signal/frontier_ece_reward/group_std_mean": 0.009202315472066402, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009216560865752399, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009216560865752399, "step": 170 }, { "calibration/aurc": 0.20591658683091413, "calibration/batch_distribution_entropy": 0.9011136319957318, "calibration/buffer_distribution_entropy": 0.9278593868874762, "calibration/confidence_entropy": 0.4000411831087075, "calibration/coverage@0%": 0.018359375, "calibration/coverage@1%": 0.018359375, "calibration/coverage@10%": 0.23671875, "calibration/coverage@15%": 0.369140625, "calibration/coverage@20%": 0.588499113258317, "calibration/coverage@25%": 0.7081213307240704, "calibration/coverage@30%": 0.7772917685909981, "calibration/coverage@5%": 0.1859375, "calibration/ece": 0.1062459463639861, "calibration/mean_confidence": 0.5850019076191907, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 876.2, "completions/max_terminated_length": 401.2, "completions/mean_length": 157.99453125, "completions/mean_terminated_length": 157.725830078125, "completions/min_length": 76.8, "completions/min_terminated_length": 76.8, "epoch": 0.56, "grad_norm": 0.0010646632872521877, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 585824462.0, "reward": 1.0233974695205688, "reward_std": 0.07817947417497635, "rewards/accuracy_reward": 0.57724609375, "rewards/brier_reward": 0.8219172954559326, "rewards/confidence_uniqueness_reward": 0.9498418092727661, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0025215481640771032, "rewards/frontier_coverage_1": 0.13713131994009017, "rewards/frontier_coverage_10": 0.13713131994009017, "rewards/frontier_coverage_15": 0.1240748941898346, "rewards/frontier_coverage_20": 0.08354234397411346, "rewards/frontier_coverage_25": 0.08381873071193695, "rewards/frontier_coverage_5": 0.13713131994009017, "rewards/frontier_ece_reward": 0.006933646369725465, "signal/accuracy_reward/centered_abs_mean": 0.086468505859375, "signal/accuracy_reward/group_std_mean": 0.1215772956609726, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0432342529296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0432342529296875, "signal/advantage_abs_mean": 0.05740503966808319, "signal/advantage_pre_scale_abs_mean": 0.05740503966808319, "signal/advantage_pre_scale_std": 0.10665770769119262, "signal/advantage_std": 0.10665770769119262, "signal/brier_reward/centered_abs_mean": 0.12967448830604553, "signal/brier_reward/group_std_mean": 0.17017283141613007, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01620931103825569, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01620931103825569, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025316498056054116, "signal/confidence_uniqueness_reward/group_std_mean": 0.033938854560256006, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031645622570067645, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031645622570067645, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025827214121818542, "signal/frontier_aurc_reward/group_std_mean": 0.004412530735135078, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6230711450334636e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6230711450334636e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14643085598945618, "signal/frontier_coverage_1/group_std_mean": 0.19515539705753326, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_coverage_10/centered_abs_mean": 0.14643085598945618, "signal/frontier_coverage_10/group_std_mean": 0.19515539705753326, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_coverage_15/centered_abs_mean": 0.12996700257062913, "signal/frontier_coverage_15/group_std_mean": 0.17408455312252044, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002326409285888076, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002326409285888076, "signal/frontier_coverage_20/centered_abs_mean": 0.07923403531312942, "signal/frontier_coverage_20/group_std_mean": 0.10590324848890305, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014182891929522157, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014182891929522157, "signal/frontier_coverage_25/centered_abs_mean": 0.07106765508651733, "signal/frontier_coverage_25/group_std_mean": 0.09031975120306016, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012721109902486204, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012721109902486204, "signal/frontier_coverage_5/centered_abs_mean": 0.14643085598945618, "signal/frontier_coverage_5/group_std_mean": 0.19515539705753326, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026211123913526535, "signal/frontier_ece_reward/centered_abs_mean": 0.006462567299604416, "signal/frontier_ece_reward/group_std_mean": 0.008214760478585959, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000807820912450552, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000807820912450552, "step": 175 }, { "calibration/aurc": 0.23616409851066922, "calibration/batch_distribution_entropy": 0.9144159502843531, "calibration/buffer_distribution_entropy": 0.9289005260583117, "calibration/confidence_entropy": 0.4101810968351833, "calibration/coverage@0%": 0.00859375, "calibration/coverage@1%": 0.00859375, "calibration/coverage@10%": 0.22890625, "calibration/coverage@15%": 0.34106158088235294, "calibration/coverage@20%": 0.46107689950980396, "calibration/coverage@25%": 0.6399234068627451, "calibration/coverage@30%": 0.7298238357843138, "calibration/coverage@5%": 0.0875, "calibration/ece": 0.10061718255943135, "calibration/mean_confidence": 0.5690684021730142, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1069.4, "completions/max_terminated_length": 392.8, "completions/mean_length": 156.8580078125, "completions/mean_terminated_length": 156.31970825195313, "completions/min_length": 78.6, "completions/min_terminated_length": 78.6, "epoch": 0.576, "grad_norm": 0.0010169014567509294, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 602617312.0, "reward": 1.0232046604156495, "reward_std": 0.07158796712756157, "rewards/accuracy_reward": 0.5810546875, "rewards/brier_reward": 0.8135895252227783, "rewards/confidence_uniqueness_reward": 0.947513747215271, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0024039767682552337, "rewards/frontier_coverage_1": 0.12831918448209761, "rewards/frontier_coverage_10": 0.12831918448209761, "rewards/frontier_coverage_15": 0.12189059555530549, "rewards/frontier_coverage_20": 0.08235756382346153, "rewards/frontier_coverage_25": 0.08532513380050659, "rewards/frontier_coverage_5": 0.12831918448209761, "rewards/frontier_ece_reward": 0.006019887700676918, "signal/accuracy_reward/centered_abs_mean": 0.076611328125, "signal/accuracy_reward/group_std_mean": 0.10829295367002487, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0383056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0383056640625, "signal/advantage_abs_mean": 0.052008964121341705, "signal/advantage_pre_scale_abs_mean": 0.052008964121341705, "signal/advantage_pre_scale_std": 0.09942405074834823, "signal/advantage_std": 0.09942405074834823, "signal/brier_reward/centered_abs_mean": 0.12525332272052764, "signal/brier_reward/group_std_mean": 0.16305108666419982, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015656665340065955, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015656665340065955, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026879063248634337, "signal/confidence_uniqueness_reward/group_std_mean": 0.036187725886702535, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003359882906079292, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003359882906079292, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002302885288372636, "signal/frontier_aurc_reward/group_std_mean": 0.004087219154462219, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.122164536966011e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.122164536966011e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14966692626476288, "signal/frontier_coverage_1/group_std_mean": 0.19450730979442596, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_coverage_10/centered_abs_mean": 0.14966692626476288, "signal/frontier_coverage_10/group_std_mean": 0.19450730979442596, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_coverage_15/centered_abs_mean": 0.13418073505163192, "signal/frontier_coverage_15/group_std_mean": 0.17452655732631683, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002401835098862648, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002401835098862648, "signal/frontier_coverage_20/centered_abs_mean": 0.08138690441846848, "signal/frontier_coverage_20/group_std_mean": 0.1053778126835823, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014568255050107838, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014568255050107838, "signal/frontier_coverage_25/centered_abs_mean": 0.06989559829235077, "signal/frontier_coverage_25/group_std_mean": 0.08840005397796631, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001251131179742515, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001251131179742515, "signal/frontier_coverage_5/centered_abs_mean": 0.14966692626476288, "signal/frontier_coverage_5/group_std_mean": 0.19450730979442596, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002679037814959884, "signal/frontier_ece_reward/centered_abs_mean": 0.006055058259516954, "signal/frontier_ece_reward/group_std_mean": 0.007611721567809582, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007568822824396193, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007568822824396193, "step": 180 }, { "calibration/aurc": 0.25049247038311473, "calibration/batch_distribution_entropy": 0.9216749923978981, "calibration/buffer_distribution_entropy": 0.928906031519291, "calibration/confidence_entropy": 0.4038246399408652, "calibration/coverage@0%": 0.025390625, "calibration/coverage@1%": 0.025390625, "calibration/coverage@10%": 0.19765625, "calibration/coverage@15%": 0.36015625, "calibration/coverage@20%": 0.5173961900684931, "calibration/coverage@25%": 0.6072766328277887, "calibration/coverage@30%": 0.6674695755870841, "calibration/coverage@5%": 0.10234375, "calibration/ece": 0.1129630310966048, "calibration/mean_confidence": 0.5487327841219951, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 598.2, "completions/max_terminated_length": 358.6, "completions/mean_length": 155.59599609375, "completions/mean_terminated_length": 155.46160583496095, "completions/min_length": 74.0, "completions/min_terminated_length": 74.0, "epoch": 0.592, "grad_norm": 0.0011274260468780994, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 619378327.0, "reward": 1.0228557944297791, "reward_std": 0.07906165421009063, "rewards/accuracy_reward": 0.5802734375, "rewards/brier_reward": 0.8147315979003906, "rewards/confidence_uniqueness_reward": 0.9459418058395386, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0021479753311723472, "rewards/frontier_coverage_1": 0.1302879810333252, "rewards/frontier_coverage_10": 0.1302879810333252, "rewards/frontier_coverage_15": 0.1160609021782875, "rewards/frontier_coverage_20": 0.0786726415157318, "rewards/frontier_coverage_25": 0.08592544496059418, "rewards/frontier_coverage_5": 0.1302879810333252, "rewards/frontier_ece_reward": 0.006006046012043953, "signal/accuracy_reward/centered_abs_mean": 0.1034423828125, "signal/accuracy_reward/group_std_mean": 0.13525836020708085, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05172119140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05172119140625, "signal/advantage_abs_mean": 0.0604724645614624, "signal/advantage_pre_scale_abs_mean": 0.0604724645614624, "signal/advantage_pre_scale_std": 0.10902182012796402, "signal/advantage_std": 0.10902182012796402, "signal/brier_reward/centered_abs_mean": 0.12949165105819702, "signal/brier_reward/group_std_mean": 0.16734184324741364, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016186456382274627, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016186456382274627, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027432877197861673, "signal/confidence_uniqueness_reward/group_std_mean": 0.03624261319637299, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003429109649732709, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003429109649732709, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021959642181172967, "signal/frontier_aurc_reward/group_std_mean": 0.003895052522420883, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.930775710614398e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.930775710614398e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16536442041397095, "signal/frontier_coverage_1/group_std_mean": 0.21532918214797975, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_coverage_10/centered_abs_mean": 0.16536442041397095, "signal/frontier_coverage_10/group_std_mean": 0.21532918214797975, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_coverage_15/centered_abs_mean": 0.14321968853473663, "signal/frontier_coverage_15/group_std_mean": 0.1867223024368286, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002563632372766733, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002563632372766733, "signal/frontier_coverage_20/centered_abs_mean": 0.0868200957775116, "signal/frontier_coverage_20/group_std_mean": 0.11267746090888978, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015540797030553222, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015540797030553222, "signal/frontier_coverage_25/centered_abs_mean": 0.07138665020465851, "signal/frontier_coverage_25/group_std_mean": 0.0901971310377121, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012778210220858455, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012778210220858455, "signal/frontier_coverage_5/centered_abs_mean": 0.16536442041397095, "signal/frontier_coverage_5/group_std_mean": 0.21532918214797975, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029600230976939202, "signal/frontier_ece_reward/centered_abs_mean": 0.006090964470058679, "signal/frontier_ece_reward/group_std_mean": 0.00773719884455204, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007613705587573349, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007613705587573349, "step": 185 }, { "calibration/aurc": 0.18192419947165778, "calibration/batch_distribution_entropy": 0.8738511825598287, "calibration/buffer_distribution_entropy": 0.9289629376118806, "calibration/confidence_entropy": 0.3687290534199558, "calibration/coverage@0%": 0.10390625, "calibration/coverage@1%": 0.1578125, "calibration/coverage@10%": 0.44656846257338556, "calibration/coverage@15%": 0.5469736117906067, "calibration/coverage@20%": 0.6266771648727985, "calibration/coverage@25%": 0.7106829439823874, "calibration/coverage@30%": 0.7673380931996086, "calibration/coverage@5%": 0.3422218994618395, "calibration/ece": 0.10703189610363692, "calibration/mean_confidence": 0.5199164274477053, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 609.0, "completions/max_terminated_length": 381.8, "completions/mean_length": 156.66416015625, "completions/mean_terminated_length": 156.52974548339844, "completions/min_length": 75.8, "completions/min_terminated_length": 75.8, "epoch": 0.608, "grad_norm": 0.0009081127354875207, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 635982056.0, "reward": 1.025065505504608, "reward_std": 0.06199713125824928, "rewards/accuracy_reward": 0.571484375, "rewards/brier_reward": 0.8416186571121216, "rewards/confidence_uniqueness_reward": 0.9414823293685913, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.001796682784333825, "rewards/frontier_coverage_1": 0.17411440312862397, "rewards/frontier_coverage_10": 0.17411440312862397, "rewards/frontier_coverage_15": 0.14766598343849183, "rewards/frontier_coverage_20": 0.10131891369819641, "rewards/frontier_coverage_25": 0.10434879511594772, "rewards/frontier_coverage_5": 0.17411440312862397, "rewards/frontier_ece_reward": 0.006736797094345093, "signal/accuracy_reward/centered_abs_mean": 0.0841552734375, "signal/accuracy_reward/group_std_mean": 0.11044697016477585, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04207763671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04207763671875, "signal/advantage_abs_mean": 0.04641749858856201, "signal/advantage_pre_scale_abs_mean": 0.04641749858856201, "signal/advantage_pre_scale_std": 0.09046411365270615, "signal/advantage_std": 0.09046411365270615, "signal/brier_reward/centered_abs_mean": 0.11504580080509186, "signal/brier_reward/group_std_mean": 0.14834731221199035, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014380725100636482, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014380725100636482, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0289421908557415, "signal/confidence_uniqueness_reward/group_std_mean": 0.03680059537291527, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036177738569676877, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036177738569676877, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018506290158256888, "signal/frontier_aurc_reward/group_std_mean": 0.0032447043806314467, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.31262570398394e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.31262570398394e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16321699619293212, "signal/frontier_coverage_1/group_std_mean": 0.2083958327770233, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_coverage_10/centered_abs_mean": 0.16321699619293212, "signal/frontier_coverage_10/group_std_mean": 0.2083958327770233, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_coverage_15/centered_abs_mean": 0.13163567185401917, "signal/frontier_coverage_15/group_std_mean": 0.16858604550361633, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023562783375382424, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023562783375382424, "signal/frontier_coverage_20/centered_abs_mean": 0.08355329185724258, "signal/frontier_coverage_20/group_std_mean": 0.10619560033082961, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014956038678064943, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014956038678064943, "signal/frontier_coverage_25/centered_abs_mean": 0.06687505841255188, "signal/frontier_coverage_25/group_std_mean": 0.08423561900854111, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011970635503530502, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011970635503530502, "signal/frontier_coverage_5/centered_abs_mean": 0.16321699619293212, "signal/frontier_coverage_5/group_std_mean": 0.2083958327770233, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029215840622782707, "signal/frontier_ece_reward/centered_abs_mean": 0.005484546534717083, "signal/frontier_ece_reward/group_std_mean": 0.006870439555495977, "signal/frontier_ece_reward/group_zero_std_frac": 0.009375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006855683168396354, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006855683168396354, "step": 190 }, { "calibration/aurc": 0.1971963236746678, "calibration/batch_distribution_entropy": 0.9205140143105222, "calibration/buffer_distribution_entropy": 0.9295399279070912, "calibration/confidence_entropy": 0.40455422987883516, "calibration/coverage@0%": 0.0234375, "calibration/coverage@1%": 0.061328125, "calibration/coverage@10%": 0.33671875, "calibration/coverage@15%": 0.4609375, "calibration/coverage@20%": 0.562890625, "calibration/coverage@25%": 0.635546875, "calibration/coverage@30%": 0.78515625, "calibration/coverage@5%": 0.2, "calibration/ece": 0.09562349489051207, "calibration/mean_confidence": 0.524824910104146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.6, "completions/max_terminated_length": 385.6, "completions/mean_length": 158.17109375, "completions/mean_terminated_length": 158.17109375, "completions/min_length": 75.4, "completions/min_terminated_length": 75.4, "epoch": 0.624, "grad_norm": 0.001079601002857089, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 652945632.0, "reward": 1.0264529228210448, "reward_std": 0.0716327577829361, "rewards/accuracy_reward": 0.57841796875, "rewards/brier_reward": 0.8322904944419861, "rewards/confidence_uniqueness_reward": 0.9480019688606263, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00190048823133111, "rewards/frontier_coverage_1": 0.15654837489128112, "rewards/frontier_coverage_10": 0.15654837489128112, "rewards/frontier_coverage_15": 0.13024692088365555, "rewards/frontier_coverage_20": 0.09054728597402573, "rewards/frontier_coverage_25": 0.09681654870510101, "rewards/frontier_coverage_5": 0.15654837489128112, "rewards/frontier_ece_reward": 0.005586811527609825, "signal/accuracy_reward/centered_abs_mean": 0.097406005859375, "signal/accuracy_reward/group_std_mean": 0.12599362283945084, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0487030029296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0487030029296875, "signal/advantage_abs_mean": 0.055555340647697446, "signal/advantage_pre_scale_abs_mean": 0.055555340647697446, "signal/advantage_pre_scale_std": 0.10187341719865799, "signal/advantage_std": 0.10187341719865799, "signal/brier_reward/centered_abs_mean": 0.12212891280651092, "signal/brier_reward/group_std_mean": 0.15707454681396485, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015266114100813865, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015266114100813865, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024161863327026366, "signal/confidence_uniqueness_reward/group_std_mean": 0.03125019893050194, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003020232915878296, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003020232915878296, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.001966876885853708, "signal/frontier_aurc_reward/group_std_mean": 0.003790367441251874, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.5207096880185416e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.5207096880185416e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.166505765914917, "signal/frontier_coverage_1/group_std_mean": 0.2163769483566284, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_coverage_10/centered_abs_mean": 0.166505765914917, "signal/frontier_coverage_10/group_std_mean": 0.2163769483566284, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_coverage_15/centered_abs_mean": 0.12933739721775056, "signal/frontier_coverage_15/group_std_mean": 0.16911623477935792, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002315139351412654, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002315139351412654, "signal/frontier_coverage_20/centered_abs_mean": 0.0790199413895607, "signal/frontier_coverage_20/group_std_mean": 0.10347112566232682, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014144569169729948, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014144569169729948, "signal/frontier_coverage_25/centered_abs_mean": 0.06764063239097595, "signal/frontier_coverage_25/group_std_mean": 0.08631972819566727, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012107673101127148, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012107673101127148, "signal/frontier_coverage_5/centered_abs_mean": 0.166505765914917, "signal/frontier_coverage_5/group_std_mean": 0.2163769483566284, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00298045314848423, "signal/frontier_ece_reward/centered_abs_mean": 0.005112008564174176, "signal/frontier_ece_reward/group_std_mean": 0.006569109484553337, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000639001070521772, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000639001070521772, "step": 195 }, { "calibration/aurc": 0.195020748165163, "calibration/batch_distribution_entropy": 0.8922545357396509, "calibration/buffer_distribution_entropy": 0.9305442441617402, "calibration/confidence_entropy": 0.3912324496194214, "calibration/coverage@0%": 0.044140625, "calibration/coverage@1%": 0.044140625, "calibration/coverage@10%": 0.32421875, "calibration/coverage@15%": 0.473046875, "calibration/coverage@20%": 0.556640625, "calibration/coverage@25%": 0.665234375, "calibration/coverage@30%": 0.76875, "calibration/coverage@5%": 0.233984375, "calibration/ece": 0.12871549991004977, "calibration/mean_confidence": 0.5960780698014887, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 374.2, "completions/max_terminated_length": 374.2, "completions/mean_length": 161.2595703125, "completions/mean_terminated_length": 161.2595703125, "completions/min_length": 79.6, "completions/min_terminated_length": 79.6, "epoch": 0.64, "grad_norm": 0.0008534787921234965, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 669939618.0, "reward": 1.0402125120162964, "reward_std": 0.0648931972682476, "rewards/accuracy_reward": 0.613671875, "rewards/brier_reward": 0.8254677057266235, "rewards/confidence_uniqueness_reward": 0.946368408203125, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002193172019906342, "rewards/frontier_coverage_1": 0.11940560638904571, "rewards/frontier_coverage_10": 0.11940560638904571, "rewards/frontier_coverage_15": 0.09389316588640213, "rewards/frontier_coverage_20": 0.07351961880922317, "rewards/frontier_coverage_25": 0.10749737620353698, "rewards/frontier_coverage_5": 0.11940560638904571, "rewards/frontier_ece_reward": 0.004826861340552569, "signal/accuracy_reward/centered_abs_mean": 0.07481689453125, "signal/accuracy_reward/group_std_mean": 0.10642163604497909, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037408447265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.037408447265625, "signal/advantage_abs_mean": 0.04719259664416313, "signal/advantage_pre_scale_abs_mean": 0.04719259664416313, "signal/advantage_pre_scale_std": 0.0948547139763832, "signal/advantage_std": 0.0948547139763832, "signal/brier_reward/centered_abs_mean": 0.11356604993343353, "signal/brier_reward/group_std_mean": 0.14705831706523895, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014195756241679191, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014195756241679191, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025693368911743165, "signal/confidence_uniqueness_reward/group_std_mean": 0.03297973945736885, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032116711139678956, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032116711139678956, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002536199474707246, "signal/frontier_aurc_reward/group_std_mean": 0.004635827429592609, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.5397969006444325e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.5397969006444325e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13650378882884978, "signal/frontier_coverage_1/group_std_mean": 0.1806756556034088, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_coverage_10/centered_abs_mean": 0.13650378882884978, "signal/frontier_coverage_10/group_std_mean": 0.1806756556034088, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_coverage_15/centered_abs_mean": 0.09933190941810607, "signal/frontier_coverage_15/group_std_mean": 0.13171655982732772, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017780411522835492, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017780411522835492, "signal/frontier_coverage_20/centered_abs_mean": 0.06690454185009002, "signal/frontier_coverage_20/group_std_mean": 0.0870763123035431, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011975912610068917, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011975912610068917, "signal/frontier_coverage_25/centered_abs_mean": 0.07027508169412613, "signal/frontier_coverage_25/group_std_mean": 0.09005680382251739, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012579238740727306, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012579238740727306, "signal/frontier_coverage_5/centered_abs_mean": 0.13650378882884978, "signal/frontier_coverage_5/group_std_mean": 0.1806756556034088, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024434176739305258, "signal/frontier_ece_reward/centered_abs_mean": 0.004354535695165395, "signal/frontier_ece_reward/group_std_mean": 0.005637980904430151, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005443169618956744, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005443169618956744, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.46342354183372153, "eval_calibration/batch_distribution_entropy": 0.8267386701063087, "eval_calibration/buffer_distribution_entropy": 0.9303503713284912, "eval_calibration/confidence_entropy": 0.36764638225435886, "eval_calibration/coverage@0%": 0.1015625, "eval_calibration/coverage@1%": 0.1015625, "eval_calibration/coverage@10%": 0.1171875, "eval_calibration/coverage@15%": 0.140625, "eval_calibration/coverage@20%": 0.1484375, "eval_calibration/coverage@25%": 0.2421875, "eval_calibration/coverage@30%": 0.3125, "eval_calibration/coverage@5%": 0.1015625, "eval_calibration/ece": 0.26115843455188675, "eval_calibration/mean_confidence": 0.5374084345518868, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 338.5, "eval_completions/max_terminated_length": 338.5, "eval_completions/mean_length": 165.4894256591797, "eval_completions/mean_terminated_length": 165.4894256591797, "eval_completions/min_length": 96.0, "eval_completions/min_terminated_length": 96.0, "eval_loss": 0.0, "eval_num_tokens": 669939618.0, "eval_reward": 0.9351394772529602, "eval_reward_std": 0.24361010268330574, "eval_rewards/accuracy_reward": 0.41015625, "eval_rewards/brier_reward": 0.7876222133636475, "eval_rewards/confidence_uniqueness_reward": 0.893798828125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.005775218247435987, "eval_rewards/frontier_coverage_1": 0.24114028364419937, "eval_rewards/frontier_coverage_10": 0.24114028364419937, "eval_rewards/frontier_coverage_15": 0.1763102523982525, "eval_rewards/frontier_coverage_20": 0.10979359783232212, "eval_rewards/frontier_coverage_25": 0.06296418234705925, "eval_rewards/frontier_coverage_5": 0.24114028364419937, "eval_rewards/frontier_ece_reward": 0.006316208629868925, "eval_runtime": 18.1608, "eval_samples_per_second": 27.532, "eval_signal/accuracy_reward/centered_abs_mean": 0.462158203125, "eval_signal/accuracy_reward/group_std_mean": 0.4877973794937134, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2310791015625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2310791015625, "eval_signal/advantage_abs_mean": 0.22277260944247246, "eval_signal/advantage_pre_scale_abs_mean": 0.22277260944247246, "eval_signal/advantage_pre_scale_std": 0.24095501005649567, "eval_signal/advantage_std": 0.24095501005649567, "eval_signal/brier_reward/centered_abs_mean": 0.25160689651966095, "eval_signal/brier_reward/group_std_mean": 0.3050123006105423, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03145086206495762, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.03145086206495762, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0444183349609375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05229387618601322, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0055522918701171875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0055522918701171875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.008398046251386404, "eval_signal/frontier_aurc_reward/group_std_mean": 0.016706117428839207, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00015032502415124327, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00015032502415124327, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3698094040155411, "eval_signal/frontier_coverage_1/group_std_mean": 0.43596525490283966, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3698094040155411, "eval_signal/frontier_coverage_10/group_std_mean": 0.43596525490283966, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2656380385160446, "eval_signal/frontier_coverage_15/group_std_mean": 0.31497038900852203, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004754920839332044, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004754920839332044, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.15232503414154053, "eval_signal/frontier_coverage_20/group_std_mean": 0.1873607039451599, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002726617909502238, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002726617909502238, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.14092491567134857, "eval_signal/frontier_coverage_25/group_std_mean": 0.18245521932840347, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025225559365935624, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025225559365935624, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3698094040155411, "eval_signal/frontier_coverage_5/group_std_mean": 0.43596525490283966, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006619588239118457, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.008692699484527111, "eval_signal/frontier_ece_reward/group_std_mean": 0.010699421167373657, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0010865874355658889, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0010865874355658889, "eval_steps_per_second": 0.22, "step": 200 }, { "epoch": 0.64, "step": 200, "train_probe_calibration/aurc": 0.18443769404155097, "train_probe_calibration/batch_distribution_entropy": 0.8015818144093043, "train_probe_calibration/buffer_distribution_entropy": 0.9300096207172028, "train_probe_calibration/confidence_entropy": 0.3722767016939027, "train_probe_calibration/coverage@0%": 0.2890625, "train_probe_calibration/coverage@1%": 0.2890625, "train_probe_calibration/coverage@10%": 0.3515625, "train_probe_calibration/coverage@15%": 0.5078125, "train_probe_calibration/coverage@20%": 0.6015625, "train_probe_calibration/coverage@25%": 0.640625, "train_probe_calibration/coverage@30%": 0.7578125, "train_probe_calibration/coverage@5%": 0.2890625, "train_probe_calibration/ece": 0.17925781250000003, "train_probe_calibration/mean_confidence": 0.6127734375, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 298.5, "train_probe_completions/max_terminated_length": 298.5, "train_probe_completions/mean_length": 161.12843322753906, "train_probe_completions/mean_terminated_length": 161.12843322753906, "train_probe_completions/min_length": 94.5, "train_probe_completions/min_terminated_length": 94.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 669939618.0, "train_probe_reward": 1.06145441532135, "train_probe_reward_std": 0.22316357120871544, "train_probe_rewards/accuracy_reward": 0.66796875, "train_probe_rewards/brier_reward": 0.8423600494861603, "train_probe_rewards/confidence_uniqueness_reward": 0.89111328125, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0010826691941474564, "train_probe_rewards/frontier_coverage_1": 0.0965785188600421, "train_probe_rewards/frontier_coverage_10": 0.0965785188600421, "train_probe_rewards/frontier_coverage_15": 0.07919098529964685, "train_probe_rewards/frontier_coverage_20": 0.07169051561504602, "train_probe_rewards/frontier_coverage_25": 0.13048473186790943, "train_probe_rewards/frontier_coverage_5": 0.0965785188600421, "train_probe_rewards/frontier_ece_reward": 0.0046604592353105545, "train_probe_runtime": 17.1156, "train_probe_samples_per_second": 29.213, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.43115234375, "train_probe_signal/accuracy_reward/group_std_mean": 0.47117266058921814, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.215576171875, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.215576171875, "train_probe_signal/advantage_abs_mean": 0.19968737289309502, "train_probe_signal/advantage_pre_scale_abs_mean": 0.19968737289309502, "train_probe_signal/advantage_pre_scale_std": 0.22083420678973198, "train_probe_signal/advantage_std": 0.22083420678973198, "train_probe_signal/brier_reward/centered_abs_mean": 0.1940205954015255, "train_probe_signal/brier_reward/group_std_mean": 0.2574399895966053, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024252574425190687, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.024252574425190687, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047943115234375, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05579993408173323, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005992889404296875, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005992889404296875, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0022220485552679747, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.0040761920099612325, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.977466849391931e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.977466849391931e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.31603457778692245, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.44148707389831543, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.31603457778692245, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.44148707389831543, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.22388429939746857, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.31913936883211136, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004007528768852353, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004007528768852353, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.12689215876162052, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.18408489972352982, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002271369507070631, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002271369507070631, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.13436606898903847, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.1626235581934452, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024051525979302824, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024051525979302824, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.31603457778692245, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.44148707389831543, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005657018744386733, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.007275205687619746, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.009898353135213256, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009094007109524682, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009094007109524682, "train_probe_steps_per_second": 0.234 }, { "calibration/aurc": 0.29317397217948216, "calibration/batch_distribution_entropy": 0.9201408367606426, "calibration/buffer_distribution_entropy": 0.9305788418920775, "calibration/confidence_entropy": 0.41399374057065597, "calibration/coverage@0%": 0.01328125, "calibration/coverage@1%": 0.01328125, "calibration/coverage@10%": 0.093359375, "calibration/coverage@15%": 0.15234375, "calibration/coverage@20%": 0.32890625, "calibration/coverage@25%": 0.441015625, "calibration/coverage@30%": 0.597265625, "calibration/coverage@5%": 0.063671875, "calibration/ece": 0.13287014590320417, "calibration/mean_confidence": 0.5697344575082802, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.8, "completions/max_terminated_length": 395.8, "completions/mean_length": 164.48642578125, "completions/mean_terminated_length": 164.48642578125, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.656, "grad_norm": 0.0009725289419293404, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 686480503.0, "reward": 1.0179179072380067, "reward_std": 0.07355367988348008, "rewards/accuracy_reward": 0.57080078125, "rewards/brier_reward": 0.8100694179534912, "rewards/confidence_uniqueness_reward": 0.9493492126464844, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00356218283995986, "rewards/frontier_coverage_1": 0.13388518393039703, "rewards/frontier_coverage_10": 0.13388518393039703, "rewards/frontier_coverage_15": 0.10320408940315247, "rewards/frontier_coverage_20": 0.07639760747551919, "rewards/frontier_coverage_25": 0.09401055723428726, "rewards/frontier_coverage_5": 0.13388518393039703, "rewards/frontier_ece_reward": 0.004533285135403275, "signal/accuracy_reward/centered_abs_mean": 0.086358642578125, "signal/accuracy_reward/group_std_mean": 0.1176445797085762, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0431793212890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0431793212890625, "signal/advantage_abs_mean": 0.05536918267607689, "signal/advantage_pre_scale_abs_mean": 0.05536918267607689, "signal/advantage_pre_scale_std": 0.10483470559120178, "signal/advantage_std": 0.10483470559120178, "signal/brier_reward/centered_abs_mean": 0.12581576704978942, "signal/brier_reward/group_std_mean": 0.16321150958538055, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015726970881223677, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015726970881223677, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023859953880310057, "signal/confidence_uniqueness_reward/group_std_mean": 0.03090248741209507, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002982494235038757, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002982494235038757, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.004037552513182163, "signal/frontier_aurc_reward/group_std_mean": 0.006974977813661099, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.227218957268633e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.227218957268633e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14410681128501893, "signal/frontier_coverage_1/group_std_mean": 0.19003032743930817, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_coverage_10/centered_abs_mean": 0.14410681128501893, "signal/frontier_coverage_10/group_std_mean": 0.19003032743930817, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_coverage_15/centered_abs_mean": 0.10301252007484436, "signal/frontier_coverage_15/group_std_mean": 0.13664860129356385, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018439240287989379, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018439240287989379, "signal/frontier_coverage_20/centered_abs_mean": 0.06870696991682053, "signal/frontier_coverage_20/group_std_mean": 0.08993822485208511, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001229854696430266, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001229854696430266, "signal/frontier_coverage_25/centered_abs_mean": 0.07536256462335586, "signal/frontier_coverage_25/group_std_mean": 0.09631493389606476, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013489898992702365, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013489898992702365, "signal/frontier_coverage_5/centered_abs_mean": 0.14410681128501893, "signal/frontier_coverage_5/group_std_mean": 0.19003032743930817, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002579511888325214, "signal/frontier_ece_reward/centered_abs_mean": 0.004476304817944765, "signal/frontier_ece_reward/group_std_mean": 0.00577198239043355, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005595381022430957, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005595381022430957, "step": 205 }, { "calibration/aurc": 0.23434450674410473, "calibration/batch_distribution_entropy": 0.882002562151535, "calibration/buffer_distribution_entropy": 0.9311633807634617, "calibration/confidence_entropy": 0.37414299995746625, "calibration/coverage@0%": 0.0328125, "calibration/coverage@1%": 0.0328125, "calibration/coverage@10%": 0.23125, "calibration/coverage@15%": 0.301953125, "calibration/coverage@20%": 0.412109375, "calibration/coverage@25%": 0.5375, "calibration/coverage@30%": 0.68828125, "calibration/coverage@5%": 0.071484375, "calibration/ece": 0.12175364717200503, "calibration/mean_confidence": 0.5939089459842449, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.4, "completions/max_terminated_length": 393.4, "completions/mean_length": 165.54716796875, "completions/mean_terminated_length": 165.54716796875, "completions/min_length": 81.8, "completions/min_terminated_length": 81.8, "epoch": 0.672, "grad_norm": 0.000927310436964035, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 703089146.0, "reward": 1.024551224708557, "reward_std": 0.069550159573555, "rewards/accuracy_reward": 0.57529296875, "rewards/brier_reward": 0.8332065463066101, "rewards/confidence_uniqueness_reward": 0.9376014709472656, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0033317374996840953, "rewards/frontier_coverage_1": 0.16638074517250062, "rewards/frontier_coverage_10": 0.16638074517250062, "rewards/frontier_coverage_15": 0.12547548562288285, "rewards/frontier_coverage_20": 0.09414769113063812, "rewards/frontier_coverage_25": 0.11692911386489868, "rewards/frontier_coverage_5": 0.16638074517250062, "rewards/frontier_ece_reward": 0.005235725268721581, "signal/accuracy_reward/centered_abs_mean": 0.088909912109375, "signal/accuracy_reward/group_std_mean": 0.11973689049482346, "signal/accuracy_reward/group_zero_std_frac": 0.65, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0444549560546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0444549560546875, "signal/advantage_abs_mean": 0.05134270042181015, "signal/advantage_pre_scale_abs_mean": 0.05134270042181015, "signal/advantage_pre_scale_std": 0.09901983886957169, "signal/advantage_std": 0.09901983886957169, "signal/brier_reward/centered_abs_mean": 0.12112097889184952, "signal/brier_reward/group_std_mean": 0.15756649971008302, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01514012236148119, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01514012236148119, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031430721282958984, "signal/confidence_uniqueness_reward/group_std_mean": 0.03886085823178291, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003928840160369873, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003928840160369873, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00404848949983716, "signal/frontier_aurc_reward/group_std_mean": 0.0072443762794137, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.246795867104084e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.246795867104084e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1571286678314209, "signal/frontier_coverage_1/group_std_mean": 0.20347483158111573, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_coverage_10/centered_abs_mean": 0.1571286678314209, "signal/frontier_coverage_10/group_std_mean": 0.20347483158111573, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_coverage_15/centered_abs_mean": 0.11069501340389251, "signal/frontier_coverage_15/group_std_mean": 0.14373029470443727, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001981440628878772, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001981440628878772, "signal/frontier_coverage_20/centered_abs_mean": 0.07589569091796874, "signal/frontier_coverage_20/group_std_mean": 0.09735166430473327, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013585327193140983, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013585327193140983, "signal/frontier_coverage_25/centered_abs_mean": 0.0746377795934677, "signal/frontier_coverage_25/group_std_mean": 0.09605260342359542, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013360162265598774, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013360162265598774, "signal/frontier_coverage_5/centered_abs_mean": 0.1571286678314209, "signal/frontier_coverage_5/group_std_mean": 0.20347483158111573, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028126030694693325, "signal/frontier_ece_reward/centered_abs_mean": 0.004420119524002075, "signal/frontier_ece_reward/group_std_mean": 0.005659045279026031, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005525149405002594, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005525149405002594, "step": 210 }, { "calibration/aurc": 0.22316816925244712, "calibration/batch_distribution_entropy": 0.8631638001153454, "calibration/buffer_distribution_entropy": 0.9303134498916309, "calibration/confidence_entropy": 0.36794004434092653, "calibration/coverage@0%": 0.0859375, "calibration/coverage@1%": 0.109375, "calibration/coverage@10%": 0.277734375, "calibration/coverage@15%": 0.41875, "calibration/coverage@20%": 0.578515625, "calibration/coverage@25%": 0.65, "calibration/coverage@30%": 0.718359375, "calibration/coverage@5%": 0.208984375, "calibration/ece": 0.11836120683661663, "calibration/mean_confidence": 0.5856790014967167, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 431.6, "completions/max_terminated_length": 431.6, "completions/mean_length": 171.991015625, "completions/mean_terminated_length": 171.991015625, "completions/min_length": 84.4, "completions/min_terminated_length": 84.4, "epoch": 0.688, "grad_norm": 0.0016493016155436635, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 719804254.0, "reward": 1.035550093650818, "reward_std": 0.07330326288938523, "rewards/accuracy_reward": 0.6015625, "rewards/brier_reward": 0.8307091474533081, "rewards/confidence_uniqueness_reward": 0.9405294418334961, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002609227574430406, "rewards/frontier_coverage_1": 0.13768716901540756, "rewards/frontier_coverage_10": 0.13768716901540756, "rewards/frontier_coverage_15": 0.10304213985800743, "rewards/frontier_coverage_20": 0.08185177743434906, "rewards/frontier_coverage_25": 0.12052069902420044, "rewards/frontier_coverage_5": 0.13768716901540756, "rewards/frontier_ece_reward": 0.004790456034243107, "signal/accuracy_reward/centered_abs_mean": 0.0959228515625, "signal/accuracy_reward/group_std_mean": 0.13043897598981857, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04796142578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04796142578125, "signal/advantage_abs_mean": 0.05425951853394508, "signal/advantage_pre_scale_abs_mean": 0.05425951853394508, "signal/advantage_pre_scale_std": 0.1053330883383751, "signal/advantage_std": 0.1053330883383751, "signal/brier_reward/centered_abs_mean": 0.11450777053833008, "signal/brier_reward/group_std_mean": 0.15139109492301941, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01431347131729126, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01431347131729126, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02984708845615387, "signal/confidence_uniqueness_reward/group_std_mean": 0.03796382881700992, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0037308860570192336, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037308860570192336, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028274263255298137, "signal/frontier_aurc_reward/group_std_mean": 0.004675904382020235, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.06109277921496e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.06109277921496e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14792871475219727, "signal/frontier_coverage_1/group_std_mean": 0.1949632316827774, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_coverage_10/centered_abs_mean": 0.14792871475219727, "signal/frontier_coverage_10/group_std_mean": 0.1949632316827774, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_coverage_15/centered_abs_mean": 0.10406249761581421, "signal/frontier_coverage_15/group_std_mean": 0.1370965600013733, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018627186771482229, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018627186771482229, "signal/frontier_coverage_20/centered_abs_mean": 0.07288601100444794, "signal/frontier_coverage_20/group_std_mean": 0.09395883530378342, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013046595733612776, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013046595733612776, "signal/frontier_coverage_25/centered_abs_mean": 0.07228792309761048, "signal/frontier_coverage_25/group_std_mean": 0.09277454912662506, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012939537642523645, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012939537642523645, "signal/frontier_coverage_5/centered_abs_mean": 0.14792871475219727, "signal/frontier_coverage_5/group_std_mean": 0.1949632316827774, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002647924004122615, "signal/frontier_ece_reward/centered_abs_mean": 0.004083223734050989, "signal/frontier_ece_reward/group_std_mean": 0.0053210449405014515, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005104029667563736, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005104029667563736, "step": 215 }, { "calibration/aurc": 0.16011508519990608, "calibration/batch_distribution_entropy": 0.8181701793524606, "calibration/buffer_distribution_entropy": 0.928059463965063, "calibration/confidence_entropy": 0.3376720196263639, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.36796875, "calibration/coverage@15%": 0.621484375, "calibration/coverage@20%": 0.724609375, "calibration/coverage@25%": 0.799609375, "calibration/coverage@30%": 0.86796875, "calibration/coverage@5%": 0.21015625, "calibration/ece": 0.08461852811120732, "calibration/mean_confidence": 0.6100572855360106, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 441.6, "completions/max_terminated_length": 441.6, "completions/mean_length": 173.48193359375, "completions/mean_terminated_length": 173.48193359375, "completions/min_length": 87.4, "completions/min_terminated_length": 87.4, "epoch": 0.704, "grad_norm": 0.0007582867401652038, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 736446853.0, "reward": 1.0405026197433471, "reward_std": 0.06702196821570397, "rewards/accuracy_reward": 0.6091796875, "rewards/brier_reward": 0.8383830785751343, "rewards/confidence_uniqueness_reward": 0.9407512187957764, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00258009375538677, "rewards/frontier_coverage_1": 0.13729006946086883, "rewards/frontier_coverage_10": 0.13729006946086883, "rewards/frontier_coverage_15": 0.10272664576768875, "rewards/frontier_coverage_20": 0.08358165025711059, "rewards/frontier_coverage_25": 0.12972914576530456, "rewards/frontier_coverage_5": 0.13729006946086883, "rewards/frontier_ece_reward": 0.004691596981137991, "signal/accuracy_reward/centered_abs_mean": 0.08326416015625, "signal/accuracy_reward/group_std_mean": 0.10747589468955994, "signal/accuracy_reward/group_zero_std_frac": 0.7, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041632080078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.041632080078125, "signal/advantage_abs_mean": 0.051660557836294176, "signal/advantage_pre_scale_abs_mean": 0.051660557836294176, "signal/advantage_pre_scale_std": 0.09930311441421509, "signal/advantage_std": 0.09930311441421509, "signal/brier_reward/centered_abs_mean": 0.11549538522958755, "signal/brier_reward/group_std_mean": 0.15128278136253356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014436923153698444, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014436923153698444, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02790890485048294, "signal/confidence_uniqueness_reward/group_std_mean": 0.035958658903837204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034886131063103674, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034886131063103674, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028505324851721527, "signal/frontier_aurc_reward/group_std_mean": 0.004907863447442651, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.102453142171726e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.102453142171726e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14127331972122192, "signal/frontier_coverage_1/group_std_mean": 0.18485023081302643, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_coverage_10/centered_abs_mean": 0.14127331972122192, "signal/frontier_coverage_10/group_std_mean": 0.18485023081302643, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_coverage_15/centered_abs_mean": 0.09733048528432846, "signal/frontier_coverage_15/group_std_mean": 0.1279260739684105, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001742215733975172, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001742215733975172, "signal/frontier_coverage_20/centered_abs_mean": 0.06843101680278778, "signal/frontier_coverage_20/group_std_mean": 0.08805256187915803, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012249151477590203, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012249151477590203, "signal/frontier_coverage_25/centered_abs_mean": 0.07552328407764435, "signal/frontier_coverage_25/group_std_mean": 0.09613100737333298, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013518667314201594, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013518667314201594, "signal/frontier_coverage_5/centered_abs_mean": 0.14127331972122192, "signal/frontier_coverage_5/group_std_mean": 0.18485023081302643, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002528792293742299, "signal/frontier_ece_reward/centered_abs_mean": 0.003918514354154468, "signal/frontier_ece_reward/group_std_mean": 0.005041631869971752, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004898142942693084, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004898142942693084, "step": 220 }, { "calibration/aurc": 0.17004395086361163, "calibration/batch_distribution_entropy": 0.8424665156534161, "calibration/buffer_distribution_entropy": 0.9242743292015563, "calibration/confidence_entropy": 0.35076926772260686, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.369921875, "calibration/coverage@15%": 0.55390625, "calibration/coverage@20%": 0.665234375, "calibration/coverage@25%": 0.746875, "calibration/coverage@30%": 0.82890625, "calibration/coverage@5%": 0.2203125, "calibration/ece": 0.09630225556649823, "calibration/mean_confidence": 0.6019314501975569, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 416.2, "completions/max_terminated_length": 416.2, "completions/mean_length": 174.01953125, "completions/mean_terminated_length": 174.01953125, "completions/min_length": 84.6, "completions/min_terminated_length": 84.6, "epoch": 0.72, "grad_norm": 0.0010777495335787535, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 753238669.0, "reward": 1.045905351638794, "reward_std": 0.07003419697284699, "rewards/accuracy_reward": 0.61875, "rewards/brier_reward": 0.8430420279502868, "rewards/confidence_uniqueness_reward": 0.9419174194335938, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.00205157226882875, "rewards/frontier_coverage_1": 0.13308198153972625, "rewards/frontier_coverage_10": 0.13073740005493165, "rewards/frontier_coverage_15": 0.09758596122264862, "rewards/frontier_coverage_20": 0.08409450352191924, "rewards/frontier_coverage_25": 0.14153032451868058, "rewards/frontier_coverage_5": 0.13308198153972625, "rewards/frontier_ece_reward": 0.004457022994756699, "signal/accuracy_reward/centered_abs_mean": 0.090283203125, "signal/accuracy_reward/group_std_mean": 0.12350248396396638, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0451416015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0451416015625, "signal/advantage_abs_mean": 0.05188203603029251, "signal/advantage_pre_scale_abs_mean": 0.05188203603029251, "signal/advantage_pre_scale_std": 0.10271037220954896, "signal/advantage_std": 0.10271037220954896, "signal/brier_reward/centered_abs_mean": 0.11129094362258911, "signal/brier_reward/group_std_mean": 0.1453452318906784, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013911367952823639, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013911367952823639, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02697415351867676, "signal/confidence_uniqueness_reward/group_std_mean": 0.0341521717607975, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003371769189834595, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003371769189834595, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022045062622055413, "signal/frontier_aurc_reward/group_std_mean": 0.0037422746885567904, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.9460661719203925e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.9460661719203925e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1369688868522644, "signal/frontier_coverage_1/group_std_mean": 0.18146575391292571, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024517430458217858, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024517430458217858, "signal/frontier_coverage_10/centered_abs_mean": 0.1342229038476944, "signal/frontier_coverage_10/group_std_mean": 0.17787247598171235, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024025900289416312, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024025900289416312, "signal/frontier_coverage_15/centered_abs_mean": 0.08977452963590622, "signal/frontier_coverage_15/group_std_mean": 0.11959208399057389, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001606964087113738, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001606964087113738, "signal/frontier_coverage_20/centered_abs_mean": 0.06367998197674751, "signal/frontier_coverage_20/group_std_mean": 0.08262477666139603, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011398716131225228, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011398716131225228, "signal/frontier_coverage_25/centered_abs_mean": 0.07500105649232865, "signal/frontier_coverage_25/group_std_mean": 0.09586530327796935, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013425188139081002, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013425188139081002, "signal/frontier_coverage_5/centered_abs_mean": 0.1369688868522644, "signal/frontier_coverage_5/group_std_mean": 0.18146575391292571, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024517430458217858, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024517430458217858, "signal/frontier_ece_reward/centered_abs_mean": 0.0035899627022445203, "signal/frontier_ece_reward/group_std_mean": 0.004766473919153214, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00044874533778056503, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00044874533778056503, "step": 225 }, { "calibration/aurc": 0.15345096336525904, "calibration/batch_distribution_entropy": 0.8268276483686279, "calibration/buffer_distribution_entropy": 0.9194886851924824, "calibration/confidence_entropy": 0.3558457240962842, "calibration/coverage@0%": 0.159765625, "calibration/coverage@1%": 0.20546875, "calibration/coverage@10%": 0.430859375, "calibration/coverage@15%": 0.4765625, "calibration/coverage@20%": 0.64453125, "calibration/coverage@25%": 0.752734375, "calibration/coverage@30%": 0.855859375, "calibration/coverage@5%": 0.36015625, "calibration/ece": 0.11590049566667984, "calibration/mean_confidence": 0.6436574303181949, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 435.0, "completions/max_terminated_length": 435.0, "completions/mean_length": 170.905078125, "completions/mean_terminated_length": 170.905078125, "completions/min_length": 81.4, "completions/min_terminated_length": 81.4, "epoch": 0.736, "grad_norm": 0.0008474554633721709, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 769928321.0, "reward": 1.0496367454528808, "reward_std": 0.06413321122527123, "rewards/accuracy_reward": 0.62734375, "rewards/brier_reward": 0.8414469003677368, "rewards/confidence_uniqueness_reward": 0.9428779602050781, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0020532570313662292, "rewards/frontier_coverage_1": 0.12552973330020906, "rewards/frontier_coverage_10": 0.12272518426179886, "rewards/frontier_coverage_15": 0.09166048467159271, "rewards/frontier_coverage_20": 0.08297713249921798, "rewards/frontier_coverage_25": 0.14741043150424957, "rewards/frontier_coverage_5": 0.12552973330020906, "rewards/frontier_ece_reward": 0.004045005375519395, "signal/accuracy_reward/centered_abs_mean": 0.07962646484375, "signal/accuracy_reward/group_std_mean": 0.10646625757217407, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.039813232421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.039813232421875, "signal/advantage_abs_mean": 0.04857211783528328, "signal/advantage_pre_scale_abs_mean": 0.04857211783528328, "signal/advantage_pre_scale_std": 0.09665304124355316, "signal/advantage_std": 0.09665304124355316, "signal/brier_reward/centered_abs_mean": 0.10986697971820832, "signal/brier_reward/group_std_mean": 0.14322306513786315, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01373337246477604, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01373337246477604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027514719963073732, "signal/confidence_uniqueness_reward/group_std_mean": 0.034696760773658755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034393399953842165, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034393399953842165, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002127653080970049, "signal/frontier_aurc_reward/group_std_mean": 0.0035277999471873046, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.808498804573901e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.808498804573901e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13504576981067656, "signal/frontier_coverage_1/group_std_mean": 0.17763448357582093, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024173191748559477, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024173191748559477, "signal/frontier_coverage_10/centered_abs_mean": 0.12984325736761093, "signal/frontier_coverage_10/group_std_mean": 0.17088421881198884, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002324194274842739, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002324194274842739, "signal/frontier_coverage_15/centered_abs_mean": 0.0858396053314209, "signal/frontier_coverage_15/group_std_mean": 0.11354650110006333, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015365288127213717, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015365288127213717, "signal/frontier_coverage_20/centered_abs_mean": 0.06296655610203743, "signal/frontier_coverage_20/group_std_mean": 0.08114814162254333, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011271013412624598, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011271013412624598, "signal/frontier_coverage_25/centered_abs_mean": 0.07672480046749115, "signal/frontier_coverage_25/group_std_mean": 0.09848825186491013, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001373373856768012, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001373373856768012, "signal/frontier_coverage_5/centered_abs_mean": 0.13504576981067656, "signal/frontier_coverage_5/group_std_mean": 0.17763448357582093, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024173191748559477, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024173191748559477, "signal/frontier_ece_reward/centered_abs_mean": 0.0035334643442183735, "signal/frontier_ece_reward/group_std_mean": 0.00461051557213068, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004416830430272967, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004416830430272967, "step": 230 }, { "calibration/aurc": 0.23552448749974886, "calibration/batch_distribution_entropy": 0.8891411453699248, "calibration/buffer_distribution_entropy": 0.9164292924358632, "calibration/confidence_entropy": 0.37883301086559096, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.271484375, "calibration/coverage@15%": 0.36484375, "calibration/coverage@20%": 0.439453125, "calibration/coverage@25%": 0.56953125, "calibration/coverage@30%": 0.712890625, "calibration/coverage@5%": 0.1296875, "calibration/ece": 0.13301972715254065, "calibration/mean_confidence": 0.5455050746940232, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 494.4, "completions/max_terminated_length": 494.4, "completions/mean_length": 169.4642578125, "completions/mean_terminated_length": 169.4642578125, "completions/min_length": 84.8, "completions/min_terminated_length": 84.8, "epoch": 0.752, "grad_norm": 0.0008214873378165066, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 786890835.0, "reward": 1.0381749868392944, "reward_std": 0.06949230208992958, "rewards/accuracy_reward": 0.60712890625, "rewards/brier_reward": 0.8299033284187317, "rewards/confidence_uniqueness_reward": 0.9462954044342041, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002404158003628254, "rewards/frontier_coverage_1": 0.12939749360084535, "rewards/frontier_coverage_10": 0.12573150247335435, "rewards/frontier_coverage_15": 0.08957693502306938, "rewards/frontier_coverage_20": 0.07676424533128738, "rewards/frontier_coverage_25": 0.1295778825879097, "rewards/frontier_coverage_5": 0.12939749360084535, "rewards/frontier_ece_reward": 0.003980603208765388, "signal/accuracy_reward/centered_abs_mean": 0.083404541015625, "signal/accuracy_reward/group_std_mean": 0.1106999933719635, "signal/accuracy_reward/group_zero_std_frac": 0.684375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0417022705078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0417022705078125, "signal/advantage_abs_mean": 0.052297231554985044, "signal/advantage_pre_scale_abs_mean": 0.052297231554985044, "signal/advantage_pre_scale_std": 0.10435400754213334, "signal/advantage_std": 0.10435400754213334, "signal/brier_reward/centered_abs_mean": 0.10560693740844726, "signal/brier_reward/group_std_mean": 0.14195962697267533, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013200867176055908, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013200867176055908, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024635595083236695, "signal/confidence_uniqueness_reward/group_std_mean": 0.03171119168400764, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003079449385404587, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003079449385404587, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022906261961907147, "signal/frontier_aurc_reward/group_std_mean": 0.003826328832656145, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.100220685359091e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.100220685359091e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1227890282869339, "signal/frontier_coverage_1/group_std_mean": 0.16787476241588592, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021979236509650944, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021979236509650944, "signal/frontier_coverage_10/centered_abs_mean": 0.11806153655052185, "signal/frontier_coverage_10/group_std_mean": 0.1615957111120224, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021133014233782887, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021133014233782887, "signal/frontier_coverage_15/centered_abs_mean": 0.07612589448690414, "signal/frontier_coverage_15/group_std_mean": 0.10439666956663132, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013626534724608063, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013626534724608063, "signal/frontier_coverage_20/centered_abs_mean": 0.05772598385810852, "signal/frontier_coverage_20/group_std_mean": 0.07624504715204239, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010332950623705983, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010332950623705983, "signal/frontier_coverage_25/centered_abs_mean": 0.0768646091222763, "signal/frontier_coverage_25/group_std_mean": 0.09990313202142716, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013758764602243901, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013758764602243901, "signal/frontier_coverage_5/centered_abs_mean": 0.1227890282869339, "signal/frontier_coverage_5/group_std_mean": 0.16787476241588592, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021979236509650944, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021979236509650944, "signal/frontier_ece_reward/centered_abs_mean": 0.003331187181174755, "signal/frontier_ece_reward/group_std_mean": 0.00445093372836709, "signal/frontier_ece_reward/group_zero_std_frac": 0.00625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004163983976468444, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004163983976468444, "step": 235 }, { "calibration/aurc": 0.19536266164648816, "calibration/batch_distribution_entropy": 0.920489897414648, "calibration/buffer_distribution_entropy": 0.9151407350609281, "calibration/confidence_entropy": 0.40926557599582036, "calibration/coverage@0%": 0.073046875, "calibration/coverage@1%": 0.09140625, "calibration/coverage@10%": 0.42265625, "calibration/coverage@15%": 0.50078125, "calibration/coverage@20%": 0.593359375, "calibration/coverage@25%": 0.65390625, "calibration/coverage@30%": 0.7390625, "calibration/coverage@5%": 0.28359375, "calibration/ece": 0.14984672133896465, "calibration/mean_confidence": 0.5657741534754462, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 430.6, "completions/max_terminated_length": 430.6, "completions/mean_length": 174.3123046875, "completions/mean_terminated_length": 174.3123046875, "completions/min_length": 82.2, "completions/min_terminated_length": 82.2, "epoch": 0.768, "grad_norm": 0.0011468707816675305, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 803608497.0, "reward": 1.0229015827178956, "reward_std": 0.06426062434911728, "rewards/accuracy_reward": 0.56943359375, "rewards/brier_reward": 0.8373586058616638, "rewards/confidence_uniqueness_reward": 0.9492919921875, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0022013495909050107, "rewards/frontier_coverage_1": 0.16131471395492553, "rewards/frontier_coverage_10": 0.1579432725906372, "rewards/frontier_coverage_15": 0.10936646610498428, "rewards/frontier_coverage_20": 0.08728825151920319, "rewards/frontier_coverage_25": 0.12453770935535431, "rewards/frontier_coverage_5": 0.16131471395492553, "rewards/frontier_ece_reward": 0.004330319678410887, "signal/accuracy_reward/centered_abs_mean": 0.071881103515625, "signal/accuracy_reward/group_std_mean": 0.09971266686916351, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0359405517578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0359405517578125, "signal/advantage_abs_mean": 0.04755199551582336, "signal/advantage_pre_scale_abs_mean": 0.04755199551582336, "signal/advantage_pre_scale_std": 0.09524376839399337, "signal/advantage_std": 0.09524376839399337, "signal/brier_reward/centered_abs_mean": 0.10900415778160095, "signal/brier_reward/group_std_mean": 0.14201750457286835, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01362551972270012, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01362551972270012, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021919608116149902, "signal/confidence_uniqueness_reward/group_std_mean": 0.02759426794946194, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002739951014518738, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002739951014518738, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00208567357622087, "signal/frontier_aurc_reward/group_std_mean": 0.0037588839419186114, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.733355588337872e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.733355588337872e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1343323200941086, "signal/frontier_coverage_1/group_std_mean": 0.17623608708381652, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002404548367485404, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002404548367485404, "signal/frontier_coverage_10/centered_abs_mean": 0.12876609861850738, "signal/frontier_coverage_10/group_std_mean": 0.16904014348983765, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023049130104482174, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023049130104482174, "signal/frontier_coverage_15/centered_abs_mean": 0.08204463869333267, "signal/frontier_coverage_15/group_std_mean": 0.10765648931264878, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014685989357531072, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014685989357531072, "signal/frontier_coverage_20/centered_abs_mean": 0.06093166768550873, "signal/frontier_coverage_20/group_std_mean": 0.07744322419166565, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001090676779858768, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001090676779858768, "signal/frontier_coverage_25/centered_abs_mean": 0.07726499885320663, "signal/frontier_coverage_25/group_std_mean": 0.0991871863603592, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013830434065312148, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013830434065312148, "signal/frontier_coverage_5/centered_abs_mean": 0.1343323200941086, "signal/frontier_coverage_5/group_std_mean": 0.17623608708381652, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002404548367485404, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002404548367485404, "signal/frontier_ece_reward/centered_abs_mean": 0.003368105459958315, "signal/frontier_ece_reward/group_std_mean": 0.0044054843485355375, "signal/frontier_ece_reward/group_zero_std_frac": 0.015625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004210131824947894, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004210131824947894, "step": 240 }, { "calibration/aurc": 0.25572485838844444, "calibration/batch_distribution_entropy": 0.8792167443629764, "calibration/buffer_distribution_entropy": 0.9126122665055396, "calibration/confidence_entropy": 0.3721657687613595, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.14140625, "calibration/coverage@15%": 0.30270521242632614, "calibration/coverage@20%": 0.38070205058939094, "calibration/coverage@25%": 0.5285839268172887, "calibration/coverage@30%": 0.6657830918467583, "calibration/coverage@5%": 0.078515625, "calibration/ece": 0.14647233710394375, "calibration/mean_confidence": 0.5808501109260684, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 768.2, "completions/max_terminated_length": 550.6, "completions/mean_length": 174.3005859375, "completions/mean_terminated_length": 174.03550720214844, "completions/min_length": 82.4, "completions/min_terminated_length": 82.4, "epoch": 0.784, "grad_norm": 0.000932548544369638, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 820567703.0, "reward": 1.042500340938568, "reward_std": 0.07330340743064881, "rewards/accuracy_reward": 0.62099609375, "rewards/brier_reward": 0.819736099243164, "rewards/confidence_uniqueness_reward": 0.9482918739318847, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0020144137553870676, "rewards/frontier_coverage_1": 0.10770976990461349, "rewards/frontier_coverage_10": 0.10597532391548156, "rewards/frontier_coverage_15": 0.07634644880890847, "rewards/frontier_coverage_20": 0.071499665081501, "rewards/frontier_coverage_25": 0.13427656888961792, "rewards/frontier_coverage_5": 0.10770976990461349, "rewards/frontier_ece_reward": 0.003026763442903757, "signal/accuracy_reward/centered_abs_mean": 0.091192626953125, "signal/accuracy_reward/group_std_mean": 0.12209666967391967, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455963134765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0455963134765625, "signal/advantage_abs_mean": 0.05459719970822334, "signal/advantage_pre_scale_abs_mean": 0.05459719970822334, "signal/advantage_pre_scale_std": 0.10620496869087219, "signal/advantage_std": 0.10620496869087219, "signal/brier_reward/centered_abs_mean": 0.11177153140306473, "signal/brier_reward/group_std_mean": 0.14610156714916228, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013971441425383091, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013971441425383091, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02218124717473984, "signal/confidence_uniqueness_reward/group_std_mean": 0.02863222174346447, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00277265589684248, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00277265589684248, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020621836418285968, "signal/frontier_aurc_reward/group_std_mean": 0.003778617037460208, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.691308629640844e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.691308629640844e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13576821088790894, "signal/frontier_coverage_1/group_std_mean": 0.1774687796831131, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002430250868201256, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002430250868201256, "signal/frontier_coverage_10/centered_abs_mean": 0.12766512483358383, "signal/frontier_coverage_10/group_std_mean": 0.16704229712486268, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002285205526277423, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002285205526277423, "signal/frontier_coverage_15/centered_abs_mean": 0.080125692486763, "signal/frontier_coverage_15/group_std_mean": 0.10491674393415451, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001434249896556139, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001434249896556139, "signal/frontier_coverage_20/centered_abs_mean": 0.06089780628681183, "signal/frontier_coverage_20/group_std_mean": 0.07763027101755142, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010900706751272083, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010900706751272083, "signal/frontier_coverage_25/centered_abs_mean": 0.0802333727478981, "signal/frontier_coverage_25/group_std_mean": 0.10428185015916824, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014361773384734989, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014361773384734989, "signal/frontier_coverage_5/centered_abs_mean": 0.13576821088790894, "signal/frontier_coverage_5/group_std_mean": 0.1774687796831131, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002430250868201256, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002430250868201256, "signal/frontier_ece_reward/centered_abs_mean": 0.003358669299632311, "signal/frontier_ece_reward/group_std_mean": 0.004410902410745621, "signal/frontier_ece_reward/group_zero_std_frac": 0.0125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004198336624540389, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004198336624540389, "step": 245 }, { "calibration/aurc": 0.1811316225816756, "calibration/batch_distribution_entropy": 0.8751262833350175, "calibration/buffer_distribution_entropy": 0.910228495124134, "calibration/confidence_entropy": 0.3729012404104689, "calibration/coverage@0%": 0.034375, "calibration/coverage@1%": 0.034375, "calibration/coverage@10%": 0.305859375, "calibration/coverage@15%": 0.504296875, "calibration/coverage@20%": 0.6296875, "calibration/coverage@25%": 0.739453125, "calibration/coverage@30%": 0.812109375, "calibration/coverage@5%": 0.15546875, "calibration/ece": 0.10429695437782113, "calibration/mean_confidence": 0.5453522137018766, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.0, "completions/max_terminated_length": 425.0, "completions/mean_length": 175.34814453125, "completions/mean_terminated_length": 175.34814453125, "completions/min_length": 84.8, "completions/min_terminated_length": 84.8, "epoch": 0.8, "grad_norm": 0.0009130456601269543, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 837373828.0, "reward": 1.0609445571899414, "reward_std": 0.06633923426270485, "rewards/accuracy_reward": 0.64677734375, "rewards/brier_reward": 0.8513461947441101, "rewards/confidence_uniqueness_reward": 0.9482261657714843, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0015828640898689628, "rewards/frontier_coverage_1": 0.11865575462579728, "rewards/frontier_coverage_10": 0.1139179825782776, "rewards/frontier_coverage_15": 0.08259946554899215, "rewards/frontier_coverage_20": 0.08261324763298035, "rewards/frontier_coverage_25": 0.16533060371875763, "rewards/frontier_coverage_5": 0.11865575462579728, "rewards/frontier_ece_reward": 0.0034714728593826295, "signal/accuracy_reward/centered_abs_mean": 0.088458251953125, "signal/accuracy_reward/group_std_mean": 0.11326353400945663, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442291259765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442291259765625, "signal/advantage_abs_mean": 0.051153923571109775, "signal/advantage_pre_scale_abs_mean": 0.051153923571109775, "signal/advantage_pre_scale_std": 0.10137955248355865, "signal/advantage_std": 0.10137955248355865, "signal/brier_reward/centered_abs_mean": 0.10467512607574463, "signal/brier_reward/group_std_mean": 0.13507361710071564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013084390759468078, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013084390759468078, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02194211483001709, "signal/confidence_uniqueness_reward/group_std_mean": 0.028182218968868255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002742764353752136, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002742764353752136, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.001707544713281095, "signal/frontier_aurc_reward/group_std_mean": 0.002929617092013359, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.056504938285798e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.056504938285798e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13313665091991425, "signal/frontier_coverage_1/group_std_mean": 0.17197324931621552, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002383145969361067, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002383145969361067, "signal/frontier_coverage_10/centered_abs_mean": 0.12428333461284638, "signal/frontier_coverage_10/group_std_mean": 0.16083629578351974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002224671561270952, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002224671561270952, "signal/frontier_coverage_15/centered_abs_mean": 0.07507807612419129, "signal/frontier_coverage_15/group_std_mean": 0.09738193154335022, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013438975671306252, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013438975671306252, "signal/frontier_coverage_20/centered_abs_mean": 0.058079701662063596, "signal/frontier_coverage_20/group_std_mean": 0.07403742522001266, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001039626623969525, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001039626623969525, "signal/frontier_coverage_25/centered_abs_mean": 0.0810657873749733, "signal/frontier_coverage_25/group_std_mean": 0.10496636033058167, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001451077568344772, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001451077568344772, "signal/frontier_coverage_5/centered_abs_mean": 0.13313665091991425, "signal/frontier_coverage_5/group_std_mean": 0.17197324931621552, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002383145969361067, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002383145969361067, "signal/frontier_ece_reward/centered_abs_mean": 0.003261947957798839, "signal/frontier_ece_reward/group_std_mean": 0.0042387610767036675, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00040774349472485485, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00040774349472485485, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.5180207117377988, "eval_calibration/batch_distribution_entropy": 0.8033441147208936, "eval_calibration/buffer_distribution_entropy": 0.9092439339504332, "eval_calibration/confidence_entropy": 0.34970418230705264, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.03125, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.046875, "eval_calibration/coverage@30%": 0.078125, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.2625827961395344, "eval_calibration/mean_confidence": 0.5134601332432174, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 325.0, "eval_completions/max_terminated_length": 325.0, "eval_completions/mean_length": 181.17066192626953, "eval_completions/mean_terminated_length": 181.17066192626953, "eval_completions/min_length": 103.25, "eval_completions/min_terminated_length": 103.25, "eval_loss": 0.0, "eval_num_tokens": 837373828.0, "eval_reward": 0.9405190795660019, "eval_reward_std": 0.24622543156147003, "eval_rewards/accuracy_reward": 0.4296875, "eval_rewards/brier_reward": 0.7792998254299164, "eval_rewards/confidence_uniqueness_reward": 0.89208984375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.005082366755232215, "eval_rewards/frontier_coverage_1": 0.2168629802763462, "eval_rewards/frontier_coverage_10": 0.20569873228669167, "eval_rewards/frontier_coverage_15": 0.1242841575294733, "eval_rewards/frontier_coverage_20": 0.0813782811164856, "eval_rewards/frontier_coverage_25": 0.06741005275398493, "eval_rewards/frontier_coverage_5": 0.2168629802763462, "eval_rewards/frontier_ece_reward": 0.0040713000344112515, "eval_runtime": 17.9812, "eval_samples_per_second": 27.807, "eval_signal/accuracy_reward/centered_abs_mean": 0.47216796875, "eval_signal/accuracy_reward/group_std_mean": 0.4931754469871521, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.236083984375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.236083984375, "eval_signal/advantage_abs_mean": 0.22818677872419357, "eval_signal/advantage_pre_scale_abs_mean": 0.22818677872419357, "eval_signal/advantage_pre_scale_std": 0.24369388818740845, "eval_signal/advantage_std": 0.24369388818740845, "eval_signal/brier_reward/centered_abs_mean": 0.24806179851293564, "eval_signal/brier_reward/group_std_mean": 0.2987174764275551, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031007724814116955, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.031007724814116955, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04681396484375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05593178328126669, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00585174560546875, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00585174560546875, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.007204441004432738, "eval_signal/frontier_aurc_reward/group_std_mean": 0.015742348041385412, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00012895949657831807, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00012895949657831807, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3721674680709839, "eval_signal/frontier_coverage_1/group_std_mean": 0.456407867372036, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0066617976408451796, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0066617976408451796, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35168465226888657, "eval_signal/frontier_coverage_10/group_std_mean": 0.4315572455525398, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0062951549189165235, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0062951549189165235, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2018623724579811, "eval_signal/frontier_coverage_15/group_std_mean": 0.2502391189336777, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003613336244598031, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003613336244598031, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12181077525019646, "eval_signal/frontier_coverage_20/group_std_mean": 0.14741826057434082, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021804128773510456, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021804128773510456, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.18845771625638008, "eval_signal/frontier_coverage_25/group_std_mean": 0.25003478676080704, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003373392974026501, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003373392974026501, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3721674680709839, "eval_signal/frontier_coverage_5/group_std_mean": 0.456407867372036, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0066617976408451796, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0066617976408451796, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.00661488005425781, "eval_signal/frontier_ece_reward/group_std_mean": 0.008529237005859613, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008268600067822263, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008268600067822263, "eval_steps_per_second": 0.222, "step": 250 }, { "epoch": 0.8, "step": 250, "train_probe_calibration/aurc": 0.17399068903789838, "train_probe_calibration/batch_distribution_entropy": 0.7855592365623258, "train_probe_calibration/buffer_distribution_entropy": 0.9093291270316723, "train_probe_calibration/confidence_entropy": 0.35752119805263033, "train_probe_calibration/coverage@0%": 0.265625, "train_probe_calibration/coverage@1%": 0.265625, "train_probe_calibration/coverage@10%": 0.5390625, "train_probe_calibration/coverage@15%": 0.578125, "train_probe_calibration/coverage@20%": 0.6796875, "train_probe_calibration/coverage@25%": 0.7734375, "train_probe_calibration/coverage@30%": 0.875, "train_probe_calibration/coverage@5%": 0.265625, "train_probe_calibration/ece": 0.2185999273752981, "train_probe_calibration/mean_confidence": 0.5888696124059519, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 301.0, "train_probe_completions/max_terminated_length": 301.0, "train_probe_completions/mean_length": 176.68392944335938, "train_probe_completions/mean_terminated_length": 176.68392944335938, "train_probe_completions/min_length": 100.25, "train_probe_completions/min_terminated_length": 100.25, "train_probe_loss": 0.0, "train_probe_num_tokens": 837373828.0, "train_probe_reward": 1.0589460730552673, "train_probe_reward_std": 0.2253180705010891, "train_probe_rewards/accuracy_reward": 0.654296875, "train_probe_rewards/brier_reward": 0.8551206290721893, "train_probe_rewards/confidence_uniqueness_reward": 0.8935546875, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_aurc_reward": -0.0010356987913837656, "train_probe_rewards/frontier_coverage_1": 0.12353460118174553, "train_probe_rewards/frontier_coverage_10": 0.11712087318301201, "train_probe_rewards/frontier_coverage_15": 0.08624438382685184, "train_probe_rewards/frontier_coverage_20": 0.0883408710360527, "train_probe_rewards/frontier_coverage_25": 0.1752123422920704, "train_probe_rewards/frontier_coverage_5": 0.12353460118174553, "train_probe_rewards/frontier_ece_reward": 0.0036108798813074827, "train_probe_runtime": 16.9813, "train_probe_samples_per_second": 29.444, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4420166015625, "train_probe_signal/accuracy_reward/group_std_mean": 0.47711893171072006, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22100830078125, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22100830078125, "train_probe_signal/advantage_abs_mean": 0.20390921458601952, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20390921458601952, "train_probe_signal/advantage_pre_scale_std": 0.22289463132619858, "train_probe_signal/advantage_std": 0.22289463132619858, "train_probe_signal/brier_reward/centered_abs_mean": 0.17955372482538223, "train_probe_signal/brier_reward/group_std_mean": 0.24644171074032784, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02244421560317278, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.02244421560317278, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0458221435546875, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054768980480730534, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057277679443359375, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057277679443359375, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.0018088824581354856, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.003385799122042954, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.237899409214151e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.237899409214151e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.34778689593076706, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4604829102754593, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006225385353900492, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006225385353900492, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.32435665279626846, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.431157648563385, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005805984023027122, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005805984023027122, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.18024399504065514, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.24868060275912285, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032263672328554094, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032263672328554094, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10621210373938084, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.13919900357723236, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019011966069228947, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019011966069228947, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.18446215242147446, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.2188771776854992, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003301872464362532, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003301872464362532, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.34778689593076706, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4604829102754593, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006225385353900492, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006225385353900492, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.005974971689283848, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.008375309873372316, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000746871461160481, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000746871461160481, "train_probe_steps_per_second": 0.236 }, { "calibration/aurc": 0.18597314571416296, "calibration/batch_distribution_entropy": 0.8201437592495328, "calibration/buffer_distribution_entropy": 0.9071677287932329, "calibration/confidence_entropy": 0.3449699186391227, "calibration/coverage@0%": 0.006640625, "calibration/coverage@1%": 0.006640625, "calibration/coverage@10%": 0.2734375, "calibration/coverage@15%": 0.45234375, "calibration/coverage@20%": 0.64765625, "calibration/coverage@25%": 0.739453125, "calibration/coverage@30%": 0.872265625, "calibration/coverage@5%": 0.059375, "calibration/ece": 0.13387027546349461, "calibration/mean_confidence": 0.6017411624426133, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 646.4, "completions/max_terminated_length": 421.2, "completions/mean_length": 175.79228515625, "completions/mean_terminated_length": 175.66018371582032, "completions/min_length": 87.8, "completions/min_terminated_length": 87.8, "epoch": 0.816, "grad_norm": 0.0009323036065325141, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 854273109.0, "reward": 1.0501249313354493, "reward_std": 0.06620060950517655, "rewards/accuracy_reward": 0.64033203125, "rewards/brier_reward": 0.8141647100448608, "rewards/confidence_uniqueness_reward": 0.9440381526947021, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0026771835517138244, "rewards/frontier_coverage_1": 0.08918848186731339, "rewards/frontier_coverage_10": 0.08616004511713982, "rewards/frontier_coverage_15": 0.06467956006526947, "rewards/frontier_coverage_20": 0.07265233993530273, "rewards/frontier_coverage_25": 0.15391016006469727, "rewards/frontier_coverage_5": 0.08918848186731339, "rewards/frontier_ece_reward": 0.0026550061535090207, "signal/accuracy_reward/centered_abs_mean": 0.078704833984375, "signal/accuracy_reward/group_std_mean": 0.10836423933506012, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393524169921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0393524169921875, "signal/advantage_abs_mean": 0.049061907827854155, "signal/advantage_pre_scale_abs_mean": 0.049061907827854155, "signal/advantage_pre_scale_std": 0.09836698472499847, "signal/advantage_std": 0.09836698472499847, "signal/brier_reward/centered_abs_mean": 0.1172541081905365, "signal/brier_reward/group_std_mean": 0.1511300802230835, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014656763523817062, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014656763523817062, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024897144734859468, "signal/confidence_uniqueness_reward/group_std_mean": 0.032081881910562514, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031121430918574335, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031121430918574335, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027089090086519717, "signal/frontier_aurc_reward/group_std_mean": 0.0045408796519041065, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.848946919082664e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.848946919082664e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13663374185562133, "signal/frontier_coverage_1/group_std_mean": 0.1800040602684021, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002445743978023529, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002445743978023529, "signal/frontier_coverage_10/centered_abs_mean": 0.1276185154914856, "signal/frontier_coverage_10/group_std_mean": 0.16832120418548585, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022843712475150825, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022843712475150825, "signal/frontier_coverage_15/centered_abs_mean": 0.07773556411266327, "signal/frontier_coverage_15/group_std_mean": 0.10220663100481034, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013914665207266808, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013914665207266808, "signal/frontier_coverage_20/centered_abs_mean": 0.06176744028925896, "signal/frontier_coverage_20/group_std_mean": 0.07860565781593323, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011056371731683612, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011056371731683612, "signal/frontier_coverage_25/centered_abs_mean": 0.08826594352722168, "signal/frontier_coverage_25/group_std_mean": 0.11294655352830887, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015799603424966335, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015799603424966335, "signal/frontier_coverage_5/centered_abs_mean": 0.13663374185562133, "signal/frontier_coverage_5/group_std_mean": 0.1800040602684021, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002445743978023529, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002445743978023529, "signal/frontier_ece_reward/centered_abs_mean": 0.003223916422575712, "signal/frontier_ece_reward/group_std_mean": 0.004277074383571744, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000402989552821964, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000402989552821964, "step": 255 }, { "calibration/aurc": 0.23470730705748308, "calibration/batch_distribution_entropy": 0.8653603005944364, "calibration/buffer_distribution_entropy": 0.9029355697295275, "calibration/confidence_entropy": 0.360791347705753, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0484375, "calibration/coverage@10%": 0.22890625, "calibration/coverage@15%": 0.322265625, "calibration/coverage@20%": 0.48125, "calibration/coverage@25%": 0.6, "calibration/coverage@30%": 0.690234375, "calibration/coverage@5%": 0.18515625, "calibration/ece": 0.11370581296588214, "calibration/mean_confidence": 0.5632247972443706, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 632.6, "completions/max_terminated_length": 421.0, "completions/mean_length": 179.2326171875, "completions/mean_terminated_length": 179.10030822753907, "completions/min_length": 87.8, "completions/min_terminated_length": 87.8, "epoch": 0.832, "grad_norm": 0.0008252764237113297, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 871116803.0, "reward": 1.041144061088562, "reward_std": 0.06580123379826545, "rewards/accuracy_reward": 0.60849609375, "rewards/brier_reward": 0.8426036357879638, "rewards/confidence_uniqueness_reward": 0.9396020889282226, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0020183057175017895, "rewards/frontier_coverage_1": 0.14359851330518722, "rewards/frontier_coverage_10": 0.13196637630462646, "rewards/frontier_coverage_15": 0.09356682449579239, "rewards/frontier_coverage_20": 0.0918369397521019, "rewards/frontier_coverage_25": 0.1650959938764572, "rewards/frontier_coverage_5": 0.14359851330518722, "rewards/frontier_ece_reward": 0.0038169843144714834, "signal/accuracy_reward/centered_abs_mean": 0.080181884765625, "signal/accuracy_reward/group_std_mean": 0.11224258989095688, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0400909423828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0400909423828125, "signal/advantage_abs_mean": 0.04792519509792328, "signal/advantage_pre_scale_abs_mean": 0.04792519509792328, "signal/advantage_pre_scale_std": 0.09914593994617463, "signal/advantage_std": 0.09914593994617463, "signal/brier_reward/centered_abs_mean": 0.10063754320144654, "signal/brier_reward/group_std_mean": 0.13023419976234435, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012579692900180817, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012579692900180817, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02736304737627506, "signal/confidence_uniqueness_reward/group_std_mean": 0.035405050963163376, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0034203809220343826, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034203809220343826, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018809714587405325, "signal/frontier_aurc_reward/group_std_mean": 0.0032024606596678497, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.3669386903056874e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.3669386903056874e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12828720062971116, "signal/frontier_coverage_1/group_std_mean": 0.167554047703743, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022963409312069414, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022963409312069414, "signal/frontier_coverage_10/centered_abs_mean": 0.11622040122747421, "signal/frontier_coverage_10/group_std_mean": 0.15186418890953063, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020803450839594006, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020803450839594006, "signal/frontier_coverage_15/centered_abs_mean": 0.07340935990214348, "signal/frontier_coverage_15/group_std_mean": 0.09487757980823516, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013140274910256266, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013140274910256266, "signal/frontier_coverage_20/centered_abs_mean": 0.056497588753700256, "signal/frontier_coverage_20/group_std_mean": 0.07151806354522705, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010113068157806993, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010113068157806993, "signal/frontier_coverage_25/centered_abs_mean": 0.07748262286186218, "signal/frontier_coverage_25/group_std_mean": 0.10127020329236984, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001386938919313252, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001386938919313252, "signal/frontier_coverage_5/centered_abs_mean": 0.12828720062971116, "signal/frontier_coverage_5/group_std_mean": 0.167554047703743, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022963409312069414, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022963409312069414, "signal/frontier_ece_reward/centered_abs_mean": 0.0030659837648272514, "signal/frontier_ece_reward/group_std_mean": 0.0040200600866228346, "signal/frontier_ece_reward/group_zero_std_frac": 0.034375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00038324797060340643, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00038324797060340643, "step": 260 }, { "calibration/aurc": 0.19863410150802505, "calibration/batch_distribution_entropy": 0.8434263072179384, "calibration/buffer_distribution_entropy": 0.9009345427966128, "calibration/confidence_entropy": 0.36509388162027195, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.35859375, "calibration/coverage@15%": 0.484375, "calibration/coverage@20%": 0.5640625, "calibration/coverage@25%": 0.616796875, "calibration/coverage@30%": 0.68828125, "calibration/coverage@5%": 0.2640625, "calibration/ece": 0.09913607843357039, "calibration/mean_confidence": 0.638816903409418, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 515.8, "completions/max_terminated_length": 515.8, "completions/mean_length": 179.28642578125, "completions/mean_terminated_length": 179.28642578125, "completions/min_length": 88.6, "completions/min_terminated_length": 88.6, "epoch": 0.848, "grad_norm": 0.000993276946246624, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 887967064.0, "reward": 1.0350669145584106, "reward_std": 0.06454772800207138, "rewards/accuracy_reward": 0.59892578125, "rewards/brier_reward": 0.8351063370704651, "rewards/confidence_uniqueness_reward": 0.9449150085449218, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.00206311687361449, "rewards/frontier_coverage_1": 0.1330704927444458, "rewards/frontier_coverage_10": 0.12332247197628021, "rewards/frontier_coverage_15": 0.08595439046621323, "rewards/frontier_coverage_20": 0.08236979991197586, "rewards/frontier_coverage_25": 0.15357653945684432, "rewards/frontier_coverage_5": 0.1330704927444458, "rewards/frontier_ece_reward": 0.003629566542804241, "signal/accuracy_reward/centered_abs_mean": 0.073809814453125, "signal/accuracy_reward/group_std_mean": 0.09948968291282653, "signal/accuracy_reward/group_zero_std_frac": 0.709375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0369049072265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0369049072265625, "signal/advantage_abs_mean": 0.04851563647389412, "signal/advantage_pre_scale_abs_mean": 0.04851563647389412, "signal/advantage_pre_scale_std": 0.09739507734775543, "signal/advantage_std": 0.09739507734775543, "signal/brier_reward/centered_abs_mean": 0.10919748991727829, "signal/brier_reward/group_std_mean": 0.14081784188747407, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013649686239659786, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013649686239659786, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024629361182451247, "signal/confidence_uniqueness_reward/group_std_mean": 0.03201264031231403, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003078670147806406, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003078670147806406, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020340461749583484, "signal/frontier_aurc_reward/group_std_mean": 0.003400903893634677, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6409427048056385e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6409427048056385e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12788800597190858, "signal/frontier_coverage_1/group_std_mean": 0.16677136719226837, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002289195219054818, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002289195219054818, "signal/frontier_coverage_10/centered_abs_mean": 0.11876944452524185, "signal/frontier_coverage_10/group_std_mean": 0.1548892468214035, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021259729750454427, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021259729750454427, "signal/frontier_coverage_15/centered_abs_mean": 0.07295108437538148, "signal/frontier_coverage_15/group_std_mean": 0.09489114880561829, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013058244483545422, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013058244483545422, "signal/frontier_coverage_20/centered_abs_mean": 0.05809517651796341, "signal/frontier_coverage_20/group_std_mean": 0.07366363406181335, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010399035876616836, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010399035876616836, "signal/frontier_coverage_25/centered_abs_mean": 0.08594117909669877, "signal/frontier_coverage_25/group_std_mean": 0.1105627328157425, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001538347010500729, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001538347010500729, "signal/frontier_coverage_5/centered_abs_mean": 0.12788800597190858, "signal/frontier_coverage_5/group_std_mean": 0.16677136719226837, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002289195219054818, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002289195219054818, "signal/frontier_ece_reward/centered_abs_mean": 0.0031425395514816045, "signal/frontier_ece_reward/group_std_mean": 0.004072493128478527, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00039281744393520056, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00039281744393520056, "step": 265 }, { "calibration/aurc": 0.1838647683664046, "calibration/batch_distribution_entropy": 0.8080613044753928, "calibration/buffer_distribution_entropy": 0.899667503332226, "calibration/confidence_entropy": 0.35472012648297896, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.275390625, "calibration/coverage@15%": 0.397265625, "calibration/coverage@20%": 0.58984375, "calibration/coverage@25%": 0.82734375, "calibration/coverage@30%": 0.880078125, "calibration/coverage@5%": 0.18203125, "calibration/ece": 0.13873622892694146, "calibration/mean_confidence": 0.6925125915452537, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 476.8, "completions/max_terminated_length": 476.8, "completions/mean_length": 181.34345703125, "completions/mean_terminated_length": 181.34345703125, "completions/min_length": 87.8, "completions/min_terminated_length": 87.8, "epoch": 0.864, "grad_norm": 0.0011021445970982313, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 904810837.0, "reward": 1.054364514350891, "reward_std": 0.06596897840499878, "rewards/accuracy_reward": 0.6439453125, "rewards/brier_reward": 0.8299910545349121, "rewards/confidence_uniqueness_reward": 0.9402127981185913, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.001953143556602299, "rewards/frontier_coverage_1": 0.09931659996509552, "rewards/frontier_coverage_10": 0.09283578842878341, "rewards/frontier_coverage_15": 0.06951518058776855, "rewards/frontier_coverage_20": 0.07742422819137573, "rewards/frontier_coverage_25": 0.16652192324399948, "rewards/frontier_coverage_5": 0.09931659996509552, "rewards/frontier_ece_reward": 0.0029755703639239074, "signal/accuracy_reward/centered_abs_mean": 0.0814697265625, "signal/accuracy_reward/group_std_mean": 0.10998818576335907, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04073486328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04073486328125, "signal/advantage_abs_mean": 0.04864993765950203, "signal/advantage_pre_scale_abs_mean": 0.04864993765950203, "signal/advantage_pre_scale_std": 0.09855391681194306, "signal/advantage_std": 0.09855391681194306, "signal/brier_reward/centered_abs_mean": 0.10496192872524261, "signal/brier_reward/group_std_mean": 0.13740627765655516, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013120241090655326, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013120241090655326, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026168223470449448, "signal/confidence_uniqueness_reward/group_std_mean": 0.0334943987429142, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003271027933806181, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003271027933806181, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0018899486400187015, "signal/frontier_aurc_reward/group_std_mean": 0.0030752378050237896, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.38300786097534e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.38300786097534e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.12643099427223206, "signal/frontier_coverage_1/group_std_mean": 0.16757656931877135, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00226311469450593, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00226311469450593, "signal/frontier_coverage_10/centered_abs_mean": 0.11652288883924485, "signal/frontier_coverage_10/group_std_mean": 0.15484984815120698, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020857596304267646, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020857596304267646, "signal/frontier_coverage_15/centered_abs_mean": 0.07096642255783081, "signal/frontier_coverage_15/group_std_mean": 0.09379614144563675, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0012702989391982556, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012702989391982556, "signal/frontier_coverage_20/centered_abs_mean": 0.05811881348490715, "signal/frontier_coverage_20/group_std_mean": 0.07434172034263611, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010403267107903958, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010403267107903958, "signal/frontier_coverage_25/centered_abs_mean": 0.08379273712635041, "signal/frontier_coverage_25/group_std_mean": 0.10927441716194153, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014998900005593896, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014998900005593896, "signal/frontier_coverage_5/centered_abs_mean": 0.12643099427223206, "signal/frontier_coverage_5/group_std_mean": 0.16757656931877135, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00226311469450593, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00226311469450593, "signal/frontier_ece_reward/centered_abs_mean": 0.002925369096919894, "signal/frontier_ece_reward/group_std_mean": 0.003875131858512759, "signal/frontier_ece_reward/group_zero_std_frac": 0.01875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00036567113711498677, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00036567113711498677, "step": 270 }, { "calibration/aurc": 0.3097481712777975, "calibration/batch_distribution_entropy": 0.8705793388016131, "calibration/buffer_distribution_entropy": 0.8972102592893678, "calibration/confidence_entropy": 0.35807765865513697, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.08438340875733855, "calibration/coverage@15%": 0.18757797211350294, "calibration/coverage@20%": 0.3360514004403131, "calibration/coverage@25%": 0.4349307424168297, "calibration/coverage@30%": 0.5115177042563601, "calibration/coverage@5%": 0.0, "calibration/ece": 0.15975441040071203, "calibration/mean_confidence": 0.5834101912294416, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 537.0, "completions/max_terminated_length": 537.0, "completions/mean_length": 181.69228515625, "completions/mean_terminated_length": 181.69228515625, "completions/min_length": 88.2, "completions/min_terminated_length": 88.2, "epoch": 0.88, "grad_norm": 0.0011194439139217138, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 921818438.0, "reward": 1.0138964176177978, "reward_std": 0.06668102517724037, "rewards/accuracy_reward": 0.56083984375, "rewards/brier_reward": 0.8156968593597412, "rewards/confidence_uniqueness_reward": 0.9443087816238404, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0028428094228729606, "rewards/frontier_coverage_1": 0.14721233248710633, "rewards/frontier_coverage_10": 0.1358731895685196, "rewards/frontier_coverage_15": 0.09130201935768127, "rewards/frontier_coverage_20": 0.08002846986055374, "rewards/frontier_coverage_25": 0.13434360027313233, "rewards/frontier_coverage_5": 0.14721233248710633, "rewards/frontier_ece_reward": 0.003603707766160369, "signal/accuracy_reward/centered_abs_mean": 0.075372314453125, "signal/accuracy_reward/group_std_mean": 0.10563235729932785, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0376861572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0376861572265625, "signal/advantage_abs_mean": 0.04881888553500176, "signal/advantage_pre_scale_abs_mean": 0.04881888553500176, "signal/advantage_pre_scale_std": 0.0986421599984169, "signal/advantage_std": 0.0986421599984169, "signal/brier_reward/centered_abs_mean": 0.10961353480815887, "signal/brier_reward/group_std_mean": 0.14331442713737488, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013701691851019859, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013701691851019859, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0234049953520298, "signal/confidence_uniqueness_reward/group_std_mean": 0.0299153421074152, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002925624419003725, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002925624419003725, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002696803631260991, "signal/frontier_aurc_reward/group_std_mean": 0.004359624674543738, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.827278316952288e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.827278316952288e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1325247272849083, "signal/frontier_coverage_1/group_std_mean": 0.1731933742761612, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023721925914287566, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023721925914287566, "signal/frontier_coverage_10/centered_abs_mean": 0.12130335420370102, "signal/frontier_coverage_10/group_std_mean": 0.15855235159397124, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002171329967677593, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002171329967677593, "signal/frontier_coverage_15/centered_abs_mean": 0.07543385475873947, "signal/frontier_coverage_15/group_std_mean": 0.09814363867044448, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013502659741789103, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013502659741789103, "signal/frontier_coverage_20/centered_abs_mean": 0.05862127542495728, "signal/frontier_coverage_20/group_std_mean": 0.07496060281991959, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010493207955732942, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010493207955732942, "signal/frontier_coverage_25/centered_abs_mean": 0.08424094766378402, "signal/frontier_coverage_25/group_std_mean": 0.10983462929725647, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015079128555953504, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015079128555953504, "signal/frontier_coverage_5/centered_abs_mean": 0.1325247272849083, "signal/frontier_coverage_5/group_std_mean": 0.1731933742761612, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023721925914287566, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023721925914287566, "signal/frontier_ece_reward/centered_abs_mean": 0.0032404222991317512, "signal/frontier_ece_reward/group_std_mean": 0.0042282075621187685, "signal/frontier_ece_reward/group_zero_std_frac": 0.025, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004050527873914689, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004050527873914689, "step": 275 }, { "calibration/aurc": 0.2298308327612087, "calibration/batch_distribution_entropy": 0.857929066616121, "calibration/buffer_distribution_entropy": 0.8941218456625043, "calibration/confidence_entropy": 0.3534088761944342, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.312109375, "calibration/coverage@15%": 0.409375, "calibration/coverage@20%": 0.5015625, "calibration/coverage@25%": 0.56640625, "calibration/coverage@30%": 0.671484375, "calibration/coverage@5%": 0.21875, "calibration/ece": 0.13779791596429622, "calibration/mean_confidence": 0.607771240265492, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 455.2, "completions/max_terminated_length": 455.2, "completions/mean_length": 181.246484375, "completions/mean_terminated_length": 181.246484375, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 0.896, "grad_norm": 0.0008350891876034439, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 938785250.0, "reward": 1.0364571571350099, "reward_std": 0.06273685097694397, "rewards/accuracy_reward": 0.60380859375, "rewards/brier_reward": 0.8282987594604492, "rewards/confidence_uniqueness_reward": 0.9447072267532348, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0020865506259724496, "rewards/frontier_coverage_1": 0.12943050265312195, "rewards/frontier_coverage_10": 0.11967690885066987, "rewards/frontier_coverage_15": 0.08408873230218887, "rewards/frontier_coverage_20": 0.08248092979192734, "rewards/frontier_coverage_25": 0.15849037170410157, "rewards/frontier_coverage_5": 0.12943050265312195, "rewards/frontier_ece_reward": 0.0033508573193103074, "signal/accuracy_reward/centered_abs_mean": 0.080902099609375, "signal/accuracy_reward/group_std_mean": 0.1085489347577095, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0404510498046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0404510498046875, "signal/advantage_abs_mean": 0.046602561324834826, "signal/advantage_pre_scale_abs_mean": 0.046602561324834826, "signal/advantage_pre_scale_std": 0.09552292376756669, "signal/advantage_std": 0.09552292376756669, "signal/brier_reward/centered_abs_mean": 0.10379516333341599, "signal/brier_reward/group_std_mean": 0.13422942757606507, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012974395416676998, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012974395416676998, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024189457297325134, "signal/confidence_uniqueness_reward/group_std_mean": 0.03062896504998207, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003023682162165642, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003023682162165642, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0017642589285969735, "signal/frontier_aurc_reward/group_std_mean": 0.002880441676825285, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.158023464493454e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.158023464493454e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13164688944816588, "signal/frontier_coverage_1/group_std_mean": 0.17034226059913635, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023564792238175867, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023564792238175867, "signal/frontier_coverage_10/centered_abs_mean": 0.11969798952341079, "signal/frontier_coverage_10/group_std_mean": 0.15487854182720184, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021425940096378325, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021425940096378325, "signal/frontier_coverage_15/centered_abs_mean": 0.07383271306753159, "signal/frontier_coverage_15/group_std_mean": 0.0950731098651886, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013216054998338223, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013216054998338223, "signal/frontier_coverage_20/centered_abs_mean": 0.05718918889760971, "signal/frontier_coverage_20/group_std_mean": 0.0726585105061531, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010236864443868398, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010236864443868398, "signal/frontier_coverage_25/centered_abs_mean": 0.08015549033880234, "signal/frontier_coverage_25/group_std_mean": 0.10447021871805191, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014347832417115568, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014347832417115568, "signal/frontier_coverage_5/centered_abs_mean": 0.13164688944816588, "signal/frontier_coverage_5/group_std_mean": 0.17034226059913635, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023564792238175867, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023564792238175867, "signal/frontier_ece_reward/centered_abs_mean": 0.003003358468413353, "signal/frontier_ece_reward/group_std_mean": 0.003905038023367524, "signal/frontier_ece_reward/group_zero_std_frac": 0.040625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00037541980855166913, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00037541980855166913, "step": 280 }, { "calibration/aurc": 0.3031863683876841, "calibration/batch_distribution_entropy": 0.9113038282259535, "calibration/buffer_distribution_entropy": 0.8915214898997718, "calibration/confidence_entropy": 0.3954785856124127, "calibration/coverage@0%": 0.0125, "calibration/coverage@1%": 0.0125, "calibration/coverage@10%": 0.10590600538160469, "calibration/coverage@15%": 0.25557041952054793, "calibration/coverage@20%": 0.3657679488747554, "calibration/coverage@25%": 0.451737555039139, "calibration/coverage@30%": 0.556863074853229, "calibration/coverage@5%": 0.05626834637964775, "calibration/ece": 0.15038636450266335, "calibration/mean_confidence": 0.5439663131143553, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 725.0, "completions/max_terminated_length": 523.0, "completions/mean_length": 184.86611328125, "completions/mean_terminated_length": 184.7338653564453, "completions/min_length": 79.6, "completions/min_terminated_length": 79.6, "epoch": 0.912, "grad_norm": 0.000809013785328716, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 955729575.0, "reward": 1.0262568235397338, "reward_std": 0.06516167744994164, "rewards/accuracy_reward": 0.58349609375, "rewards/brier_reward": 0.8233543515205384, "rewards/confidence_uniqueness_reward": 0.9516326189041138, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0018006491474807263, "rewards/frontier_coverage_1": 0.13025247156620026, "rewards/frontier_coverage_10": 0.12138088643550873, "rewards/frontier_coverage_15": 0.08430371508002281, "rewards/frontier_coverage_20": 0.08004055321216583, "rewards/frontier_coverage_25": 0.1411813259124756, "rewards/frontier_coverage_5": 0.13025247156620026, "rewards/frontier_ece_reward": 0.0032947796396911146, "signal/accuracy_reward/centered_abs_mean": 0.077459716796875, "signal/accuracy_reward/group_std_mean": 0.10814465284347534, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0387298583984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0387298583984375, "signal/advantage_abs_mean": 0.047399114817380905, "signal/advantage_pre_scale_abs_mean": 0.047399114817380905, "signal/advantage_pre_scale_std": 0.09288787245750427, "signal/advantage_std": 0.09288787245750427, "signal/brier_reward/centered_abs_mean": 0.1129148319363594, "signal/brier_reward/group_std_mean": 0.14614371061325074, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014114353992044925, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014114353992044925, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0207143172621727, "signal/confidence_uniqueness_reward/group_std_mean": 0.026592843234539032, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025892896577715875, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025892896577715875, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014614747371524573, "signal/frontier_aurc_reward/group_std_mean": 0.0024610649794340133, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.6160396009800026e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.6160396009800026e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.145956414937973, "signal/frontier_coverage_1/group_std_mean": 0.1896394670009613, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026126197073608635, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026126197073608635, "signal/frontier_coverage_10/centered_abs_mean": 0.13401967734098436, "signal/frontier_coverage_10/group_std_mean": 0.17390194535255432, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023989521665498613, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023989521665498613, "signal/frontier_coverage_15/centered_abs_mean": 0.08284454345703125, "signal/frontier_coverage_15/group_std_mean": 0.10729445815086365, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014829172752797604, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014829172752797604, "signal/frontier_coverage_20/centered_abs_mean": 0.061051695793867114, "signal/frontier_coverage_20/group_std_mean": 0.07795014530420304, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010928253177553415, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010928253177553415, "signal/frontier_coverage_25/centered_abs_mean": 0.08121936470270157, "signal/frontier_coverage_25/group_std_mean": 0.10639394819736481, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014538265997543931, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014538265997543931, "signal/frontier_coverage_5/centered_abs_mean": 0.145956414937973, "signal/frontier_coverage_5/group_std_mean": 0.1896394670009613, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026126197073608635, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026126197073608635, "signal/frontier_ece_reward/centered_abs_mean": 0.0034357388503849506, "signal/frontier_ece_reward/group_std_mean": 0.0044421212747693065, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042946735629811883, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042946735629811883, "step": 285 }, { "calibration/aurc": 0.2553048750948764, "calibration/batch_distribution_entropy": 0.928002335596811, "calibration/buffer_distribution_entropy": 0.8940186257378736, "calibration/confidence_entropy": 0.4102037582335584, "calibration/coverage@0%": 0.022265625, "calibration/coverage@1%": 0.022265625, "calibration/coverage@10%": 0.240234375, "calibration/coverage@15%": 0.29453125, "calibration/coverage@20%": 0.359375, "calibration/coverage@25%": 0.472265625, "calibration/coverage@30%": 0.57890625, "calibration/coverage@5%": 0.080078125, "calibration/ece": 0.11796843240314943, "calibration/mean_confidence": 0.5718039136867346, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 514.2, "completions/max_terminated_length": 514.2, "completions/mean_length": 181.9462890625, "completions/mean_terminated_length": 181.9462890625, "completions/min_length": 86.6, "completions/min_terminated_length": 86.6, "epoch": 0.928, "grad_norm": 0.0008436237112618983, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 972619521.0, "reward": 1.0353510141372682, "reward_std": 0.06413244009017945, "rewards/accuracy_reward": 0.606640625, "rewards/brier_reward": 0.816413962841034, "rewards/confidence_uniqueness_reward": 0.9475692749023438, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0016956059262156487, "rewards/frontier_coverage_1": 0.1107865646481514, "rewards/frontier_coverage_10": 0.10246082991361619, "rewards/frontier_coverage_15": 0.07582455202937126, "rewards/frontier_coverage_20": 0.07762015908956528, "rewards/frontier_coverage_25": 0.14767933785915374, "rewards/frontier_coverage_5": 0.1107865646481514, "rewards/frontier_ece_reward": 0.0029826680198311805, "signal/accuracy_reward/centered_abs_mean": 0.08046875, "signal/accuracy_reward/group_std_mean": 0.10661050379276275, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.040234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.040234375, "signal/advantage_abs_mean": 0.04859066754579544, "signal/advantage_pre_scale_abs_mean": 0.04859066754579544, "signal/advantage_pre_scale_std": 0.0963394895195961, "signal/advantage_std": 0.0963394895195961, "signal/brier_reward/centered_abs_mean": 0.1108618676662445, "signal/brier_reward/group_std_mean": 0.14311706721782685, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013857733458280563, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013857733458280563, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023229575157165526, "signal/confidence_uniqueness_reward/group_std_mean": 0.029777427762746812, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029036968946456907, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029036968946456907, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0015811802353709937, "signal/frontier_aurc_reward/group_std_mean": 0.0027030047960579394, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.8303124054218642e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.8303124054218642e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14058441817760467, "signal/frontier_coverage_1/group_std_mean": 0.1819360226392746, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025164610240608455, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025164610240608455, "signal/frontier_coverage_10/centered_abs_mean": 0.12620791643857956, "signal/frontier_coverage_10/group_std_mean": 0.1633853554725647, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022591216024011374, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022591216024011374, "signal/frontier_coverage_15/centered_abs_mean": 0.08150058835744858, "signal/frontier_coverage_15/group_std_mean": 0.10502809584140778, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014588604914024471, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014588604914024471, "signal/frontier_coverage_20/centered_abs_mean": 0.06125093549489975, "signal/frontier_coverage_20/group_std_mean": 0.07751076966524124, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010963917477056385, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010963917477056385, "signal/frontier_coverage_25/centered_abs_mean": 0.08199481666088104, "signal/frontier_coverage_25/group_std_mean": 0.10610374063253403, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014677071943879128, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014677071943879128, "signal/frontier_coverage_5/centered_abs_mean": 0.14058441817760467, "signal/frontier_coverage_5/group_std_mean": 0.1819360226392746, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025164610240608455, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025164610240608455, "signal/frontier_ece_reward/centered_abs_mean": 0.003389831865206361, "signal/frontier_ece_reward/group_std_mean": 0.004321504570543766, "signal/frontier_ece_reward/group_zero_std_frac": 0.021875, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004237289831507951, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004237289831507951, "step": 290 }, { "calibration/aurc": 0.19053175355084856, "calibration/batch_distribution_entropy": 0.9305434608784869, "calibration/buffer_distribution_entropy": 0.8955746984723145, "calibration/confidence_entropy": 0.40345241743169813, "calibration/coverage@0%": 0.092578125, "calibration/coverage@1%": 0.155078125, "calibration/coverage@10%": 0.2984375, "calibration/coverage@15%": 0.404296875, "calibration/coverage@20%": 0.55546875, "calibration/coverage@25%": 0.697265625, "calibration/coverage@30%": 0.80078125, "calibration/coverage@5%": 0.1890625, "calibration/ece": 0.10843311694105173, "calibration/mean_confidence": 0.5410684455589483, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 479.0, "completions/max_terminated_length": 479.0, "completions/mean_length": 182.3708984375, "completions/mean_terminated_length": 182.3708984375, "completions/min_length": 88.2, "completions/min_terminated_length": 88.2, "epoch": 0.944, "grad_norm": 0.000993796857073903, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 989462423.0, "reward": 1.0378493785858154, "reward_std": 0.07301433905959129, "rewards/accuracy_reward": 0.6052734375, "rewards/brier_reward": 0.8297587871551514, "rewards/confidence_uniqueness_reward": 0.9487686157226562, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0013114425935782492, "rewards/frontier_coverage_1": 0.13030335307121277, "rewards/frontier_coverage_10": 0.12149370610713958, "rewards/frontier_coverage_15": 0.08871242925524711, "rewards/frontier_coverage_20": 0.08486142754554749, "rewards/frontier_coverage_25": 0.14197321832180024, "rewards/frontier_coverage_5": 0.13030335307121277, "rewards/frontier_ece_reward": 0.003458545543253422, "signal/accuracy_reward/centered_abs_mean": 0.10616455078125, "signal/accuracy_reward/group_std_mean": 0.13756768852472306, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053082275390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053082275390625, "signal/advantage_abs_mean": 0.056171053647994997, "signal/advantage_pre_scale_abs_mean": 0.056171053647994997, "signal/advantage_pre_scale_std": 0.10713197886943818, "signal/advantage_std": 0.10713197886943818, "signal/brier_reward/centered_abs_mean": 0.10598112493753434, "signal/brier_reward/group_std_mean": 0.1365533709526062, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013247640617191792, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013247640617191792, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022769904136657713, "signal/confidence_uniqueness_reward/group_std_mean": 0.02863166332244873, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002846238017082214, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002846238017082214, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0011110721388831735, "signal/frontier_aurc_reward/group_std_mean": 0.0018645315431058407, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 1.9888190217898226e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 1.9888190217898226e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14970411360263824, "signal/frontier_coverage_1/group_std_mean": 0.195058611035347, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026797034312039613, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026797034312039613, "signal/frontier_coverage_10/centered_abs_mean": 0.13364054411649703, "signal/frontier_coverage_10/group_std_mean": 0.17438722848892213, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0023921656422317026, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023921656422317026, "signal/frontier_coverage_15/centered_abs_mean": 0.08324484527111053, "signal/frontier_coverage_15/group_std_mean": 0.10912428945302963, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014900827081874013, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014900827081874013, "signal/frontier_coverage_20/centered_abs_mean": 0.05944142565131187, "signal/frontier_coverage_20/group_std_mean": 0.07634605765342713, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010640014894306659, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010640014894306659, "signal/frontier_coverage_25/centered_abs_mean": 0.07600450217723846, "signal/frontier_coverage_25/group_std_mean": 0.09856143593788147, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001360480533912778, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001360480533912778, "signal/frontier_coverage_5/centered_abs_mean": 0.14970411360263824, "signal/frontier_coverage_5/group_std_mean": 0.195058611035347, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026797034312039613, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026797034312039613, "signal/frontier_ece_reward/centered_abs_mean": 0.0033906072843819858, "signal/frontier_ece_reward/group_std_mean": 0.004434131644666195, "signal/frontier_ece_reward/group_zero_std_frac": 0.028125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0004238259105477482, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0004238259105477482, "step": 295 }, { "calibration/aurc": 0.20952501494560322, "calibration/batch_distribution_entropy": 0.8843125473748732, "calibration/buffer_distribution_entropy": 0.8962001707723987, "calibration/confidence_entropy": 0.36058761090983166, "calibration/coverage@0%": 0.087890625, "calibration/coverage@1%": 0.087890625, "calibration/coverage@10%": 0.262109375, "calibration/coverage@15%": 0.43671875, "calibration/coverage@20%": 0.575, "calibration/coverage@25%": 0.6765625, "calibration/coverage@30%": 0.759375, "calibration/coverage@5%": 0.18125, "calibration/ece": 0.13082384829872193, "calibration/mean_confidence": 0.571152714201278, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 449.2, "completions/max_terminated_length": 449.2, "completions/mean_length": 179.28154296875, "completions/mean_terminated_length": 179.28154296875, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.96, "grad_norm": 0.0006647381815128028, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 1006238586.0, "reward": 1.0329123735427856, "reward_std": 0.05366669148206711, "rewards/accuracy_reward": 0.58798828125, "rewards/brier_reward": 0.8446584582328797, "rewards/confidence_uniqueness_reward": 0.9450820922851563, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0017721342155709863, "rewards/frontier_coverage_1": 0.15945130288600923, "rewards/frontier_coverage_10": 0.14589085876941682, "rewards/frontier_coverage_15": 0.10300841629505157, "rewards/frontier_coverage_20": 0.09736352860927582, "rewards/frontier_coverage_25": 0.15814386010169984, "rewards/frontier_coverage_5": 0.15945130288600923, "rewards/frontier_ece_reward": 0.003961241897195577, "signal/accuracy_reward/centered_abs_mean": 0.071124267578125, "signal/accuracy_reward/group_std_mean": 0.09625100940465928, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0355621337890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0355621337890625, "signal/advantage_abs_mean": 0.039787986874580385, "signal/advantage_pre_scale_abs_mean": 0.039787986874580385, "signal/advantage_pre_scale_std": 0.08424456864595413, "signal/advantage_std": 0.08424456864595413, "signal/brier_reward/centered_abs_mean": 0.09390641152858734, "signal/brier_reward/group_std_mean": 0.12350601404905319, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011738301441073417, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011738301441073417, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02469801902770996, "signal/confidence_uniqueness_reward/group_std_mean": 0.031055227667093278, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003087252378463745, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003087252378463745, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0014515453251078725, "signal/frontier_aurc_reward/group_std_mean": 0.0023173499619588258, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.5982661463785915e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.5982661463785915e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1336147144436836, "signal/frontier_coverage_1/group_std_mean": 0.1740594267845154, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002391703147441149, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002391703147441149, "signal/frontier_coverage_10/centered_abs_mean": 0.11904115676879883, "signal/frontier_coverage_10/group_std_mean": 0.15523334443569184, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002130836620926857, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002130836620926857, "signal/frontier_coverage_15/centered_abs_mean": 0.07668739408254624, "signal/frontier_coverage_15/group_std_mean": 0.09952570647001266, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013727043056860565, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013727043056860565, "signal/frontier_coverage_20/centered_abs_mean": 0.05882178023457527, "signal/frontier_coverage_20/group_std_mean": 0.07488873153924942, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010529098566621543, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010529098566621543, "signal/frontier_coverage_25/centered_abs_mean": 0.07149278298020363, "signal/frontier_coverage_25/group_std_mean": 0.09316664934158325, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001279720780439675, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001279720780439675, "signal/frontier_coverage_5/centered_abs_mean": 0.1336147144436836, "signal/frontier_coverage_5/group_std_mean": 0.1740594267845154, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002391703147441149, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002391703147441149, "signal/frontier_ece_reward/centered_abs_mean": 0.0030738627538084984, "signal/frontier_ece_reward/group_std_mean": 0.0040326244197785854, "signal/frontier_ece_reward/group_zero_std_frac": 0.03125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0003842328442260623, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0003842328442260623, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.45877331809546285, "eval_calibration/batch_distribution_entropy": 0.8487918639736909, "eval_calibration/buffer_distribution_entropy": 0.8943828101486132, "eval_calibration/confidence_entropy": 0.37301273139280466, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.1953125, "eval_calibration/coverage@25%": 0.234375, "eval_calibration/coverage@30%": 0.3046875, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.20022656249999998, "eval_calibration/mean_confidence": 0.47538281250000003, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 385.5, "eval_completions/max_terminated_length": 385.5, "eval_completions/mean_length": 177.91608428955078, "eval_completions/mean_terminated_length": 177.91608428955078, "eval_completions/min_length": 96.5, "eval_completions/min_terminated_length": 96.5, "eval_loss": 0.0, "eval_num_tokens": 1006238586.0, "eval_reward": 0.9505706876516342, "eval_reward_std": 0.24583137407898903, "eval_rewards/accuracy_reward": 0.447265625, "eval_rewards/brier_reward": 0.7869907170534134, "eval_rewards/confidence_uniqueness_reward": 0.898681640625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0033371542813256383, "eval_rewards/frontier_coverage_1": 0.20748823508620262, "eval_rewards/frontier_coverage_10": 0.1859576664865017, "eval_rewards/frontier_coverage_15": 0.12141189724206924, "eval_rewards/frontier_coverage_20": 0.08090419881045818, "eval_rewards/frontier_coverage_25": 0.07952974922955036, "eval_rewards/frontier_coverage_5": 0.20748823508620262, "eval_rewards/frontier_ece_reward": 0.0038945103879086673, "eval_runtime": 20.0315, "eval_samples_per_second": 24.961, "eval_signal/accuracy_reward/centered_abs_mean": 0.4803466796875, "eval_signal/accuracy_reward/group_std_mean": 0.4976552575826645, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.24017333984375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.24017333984375, "eval_signal/advantage_abs_mean": 0.23006105422973633, "eval_signal/advantage_pre_scale_abs_mean": 0.23006105422973633, "eval_signal/advantage_pre_scale_std": 0.24323223158717155, "eval_signal/advantage_std": 0.24323223158717155, "eval_signal/brier_reward/centered_abs_mean": 0.23864521458745003, "eval_signal/brier_reward/group_std_mean": 0.295004665851593, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.029830651823431253, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.029830651823431253, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.044342041015625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05374839436262846, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005542755126953125, "eval_signal/confidence_uniqueness_reward/weight": 0.125, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005542755126953125, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004506968369241804, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009769670432433486, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.06747302704025e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.06747302704025e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.37909433990716934, "eval_signal/frontier_coverage_1/group_std_mean": 0.46202613413333893, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00678578857332468, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00678578857332468, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3334348201751709, "eval_signal/frontier_coverage_10/group_std_mean": 0.40698229521512985, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005968482932075858, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005968482932075858, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.19724000990390778, "eval_signal/frontier_coverage_15/group_std_mean": 0.24512441456317902, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003530596033670008, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003530596033670008, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.12327801994979382, "eval_signal/frontier_coverage_20/group_std_mean": 0.1503501832485199, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0022066764649935067, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022066764649935067, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2011748030781746, "eval_signal/frontier_coverage_25/group_std_mean": 0.2626011222600937, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0036010288167744875, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0036010288167744875, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.37909433990716934, "eval_signal/frontier_coverage_5/group_std_mean": 0.46202613413333893, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00678578857332468, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00678578857332468, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006420767167583108, "eval_signal/frontier_ece_reward/group_std_mean": 0.00843157060444355, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008025958959478885, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008025958959478885, "eval_steps_per_second": 0.2, "step": 300 }, { "epoch": 0.96, "step": 300, "train_probe_calibration/aurc": 0.11334329991864908, "train_probe_calibration/batch_distribution_entropy": 0.823814016063535, "train_probe_calibration/buffer_distribution_entropy": 0.8939561434753694, "train_probe_calibration/confidence_entropy": 0.3812485580477293, "train_probe_calibration/coverage@0%": 0.3515625, "train_probe_calibration/coverage@1%": 0.3515625, "train_probe_calibration/coverage@10%": 0.6171875, "train_probe_calibration/coverage@15%": 0.71875, "train_probe_calibration/coverage@20%": 0.7890625, "train_probe_calibration/coverage@25%": 0.8671875, "train_probe_calibration/coverage@30%": 0.921875, "train_probe_calibration/coverage@5%": 0.359375, "train_probe_calibration/ece": 0.15804687500000003, "train_probe_calibration/mean_confidence": 0.590703125, "train_probe_completions/clipped_ratio": 0.001953125, "train_probe_completions/max_length": 607.0, "train_probe_completions/max_terminated_length": 316.0, "train_probe_completions/mean_length": 175.61072158813477, "train_probe_completions/mean_terminated_length": 172.95431900024414, "train_probe_completions/min_length": 94.75, "train_probe_completions/min_terminated_length": 94.75, "train_probe_loss": 0.0, "train_probe_num_tokens": 1006238586.0, "train_probe_reward": 1.0620156228542328, "train_probe_reward_std": 0.22856702283024788, "train_probe_rewards/accuracy_reward": 0.66015625, "train_probe_rewards/brier_reward": 0.8613701313734055, "train_probe_rewards/confidence_uniqueness_reward": 0.8939720988273621, "train_probe_rewards/format_reward": 0.998046875, "train_probe_rewards/frontier_aurc_reward": -0.0012163210631115362, "train_probe_rewards/frontier_coverage_1": 0.12271312065422535, "train_probe_rewards/frontier_coverage_10": 0.11114241741597652, "train_probe_rewards/frontier_coverage_15": 0.0873615425080061, "train_probe_rewards/frontier_coverage_20": 0.09772194363176823, "train_probe_rewards/frontier_coverage_25": 0.18827218934893608, "train_probe_rewards/frontier_coverage_5": 0.12271312065422535, "train_probe_rewards/frontier_ece_reward": 0.003619219409301877, "train_probe_runtime": 25.9257, "train_probe_samples_per_second": 19.286, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.440185546875, "train_probe_signal/accuracy_reward/group_std_mean": 0.4761292338371277, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.2200927734375, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2200927734375, "train_probe_signal/advantage_abs_mean": 0.2054794505238533, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2054794505238533, "train_probe_signal/advantage_pre_scale_std": 0.22665054351091385, "train_probe_signal/advantage_std": 0.22665054351091385, "train_probe_signal/brier_reward/centered_abs_mean": 0.1662147231400013, "train_probe_signal/brier_reward/group_std_mean": 0.23011131957173347, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020776840392500162, "train_probe_signal/brier_reward/weight": 0.125, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020776840392500162, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.043102139607071877, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054641361348330975, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0053877674508839846, "train_probe_signal/confidence_uniqueness_reward/weight": 0.125, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0053877674508839846, "train_probe_signal/format_reward/centered_abs_mean": 0.0037841796875, "train_probe_signal/format_reward/group_std_mean": 0.011048543266952038, "train_probe_signal/format_reward/group_zero_std_frac": 0.9375, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "train_probe_signal/frontier_aurc_reward/centered_abs_mean": 0.002128588006598875, "train_probe_signal/frontier_aurc_reward/group_std_mean": 0.004736322327516973, "train_probe_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.81017252948368e-05, "train_probe_signal/frontier_aurc_reward/weight": 0.017899999395012856, "train_probe_signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.81017252948368e-05, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.33944354206323624, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.44217299669981003, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006076039047911763, "train_probe_signal/frontier_coverage_1/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006076039047911763, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.2949482724070549, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.38730061054229736, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005279573961161077, "train_probe_signal/frontier_coverage_10/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005279573961161077, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.16588661447167397, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.2280319258570671, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002969370281789452, "train_probe_signal/frontier_coverage_15/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002969370281789452, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10244773887097836, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.12942470982670784, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018338145164307207, "train_probe_signal/frontier_coverage_20/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018338145164307207, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1977926529943943, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.23229693248867989, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003540488425642252, "train_probe_signal/frontier_coverage_25/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003540488425642252, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.33944354206323624, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.44217299669981003, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006076039047911763, "train_probe_signal/frontier_coverage_5/weight": 0.017899999395012856, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006076039047911763, "train_probe_signal/frontier_ece_reward/centered_abs_mean": 0.005502797896042466, "train_probe_signal/frontier_ece_reward/group_std_mean": 0.0075735143618658185, "train_probe_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "train_probe_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006878497370053083, "train_probe_signal/frontier_ece_reward/weight": 0.125, "train_probe_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006878497370053083, "train_probe_steps_per_second": 0.154 }, { "calibration/aurc": 0.18582277411444578, "calibration/batch_distribution_entropy": 0.9112480015757871, "calibration/buffer_distribution_entropy": 0.8951250339277783, "calibration/confidence_entropy": 0.39676409676776897, "calibration/coverage@0%": 0.08125, "calibration/coverage@1%": 0.1515625, "calibration/coverage@10%": 0.422265625, "calibration/coverage@15%": 0.4765625, "calibration/coverage@20%": 0.548046875, "calibration/coverage@25%": 0.61015625, "calibration/coverage@30%": 0.67890625, "calibration/coverage@5%": 0.338671875, "calibration/ece": 0.1494024143555333, "calibration/mean_confidence": 0.5677664443004726, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 697.6, "completions/max_terminated_length": 533.2, "completions/mean_length": 178.14248046875, "completions/mean_terminated_length": 178.00925903320314, "completions/min_length": 82.6, "completions/min_terminated_length": 82.6, "epoch": 0.976, "grad_norm": 0.0008968439069576561, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1022923885.0, "reward": 1.0480751514434814, "reward_std": 0.05989357978105545, "rewards/accuracy_reward": 0.62490234375, "rewards/brier_reward": 0.8356807827949524, "rewards/confidence_uniqueness_reward": 0.9470009803771973, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.001478810131084174, "rewards/frontier_coverage_1": 0.12298977077007293, "rewards/frontier_coverage_10": 0.1169760562479496, "rewards/frontier_coverage_15": 0.08561454713344574, "rewards/frontier_coverage_20": 0.09092361256480216, "rewards/frontier_coverage_25": 0.16219930350780487, "rewards/frontier_coverage_5": 0.12298977077007293, "rewards/frontier_ece_reward": 0.0032111145555973053, "signal/accuracy_reward/centered_abs_mean": 0.077911376953125, "signal/accuracy_reward/group_std_mean": 0.10675206631422043, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0389556884765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0389556884765625, "signal/advantage_abs_mean": 0.043699586391448976, "signal/advantage_pre_scale_abs_mean": 0.043699586391448976, "signal/advantage_pre_scale_std": 0.08926723450422287, "signal/advantage_std": 0.08926723450422287, "signal/brier_reward/centered_abs_mean": 0.096511709690094, "signal/brier_reward/group_std_mean": 0.12847652584314345, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01206396371126175, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01206396371126175, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02388366758823395, "signal/confidence_uniqueness_reward/group_std_mean": 0.03034769296646118, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029854584485292436, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029854584485292436, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002777099609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012161832652054726, "signal/frontier_aurc_reward/group_std_mean": 0.0019433848559856416, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.1769678642158397e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.1769678642158397e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13997844159603118, "signal/frontier_coverage_1/group_std_mean": 0.18497555553913117, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002505614003166556, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002505614003166556, "signal/frontier_coverage_10/centered_abs_mean": 0.12051929384469987, "signal/frontier_coverage_10/group_std_mean": 0.15961622595787048, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021572952857241033, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021572952857241033, "signal/frontier_coverage_15/centered_abs_mean": 0.07593502700328827, "signal/frontier_coverage_15/group_std_mean": 0.10026619136333466, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013592369155958294, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013592369155958294, "signal/frontier_coverage_20/centered_abs_mean": 0.05694276541471481, "signal/frontier_coverage_20/group_std_mean": 0.07313971668481827, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010192754562012851, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010192754562012851, "signal/frontier_coverage_25/centered_abs_mean": 0.07261455804109573, "signal/frontier_coverage_25/group_std_mean": 0.09487131386995315, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012998004909604787, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012998004909604787, "signal/frontier_coverage_5/centered_abs_mean": 0.13997844159603118, "signal/frontier_coverage_5/group_std_mean": 0.18497555553913117, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002505614003166556, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002505614003166556, "signal/frontier_ece_reward/centered_abs_mean": 0.003159593231976032, "signal/frontier_ece_reward/group_std_mean": 0.004189403681084514, "signal/frontier_ece_reward/group_zero_std_frac": 0.04375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000394949153997004, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000394949153997004, "step": 305 }, { "calibration/aurc": 0.3200527470519939, "calibration/batch_distribution_entropy": 0.8959806922728791, "calibration/buffer_distribution_entropy": 0.8960747979400654, "calibration/confidence_entropy": 0.3658938308584535, "calibration/coverage@0%": 0.014453125, "calibration/coverage@1%": 0.014453125, "calibration/coverage@10%": 0.043359375, "calibration/coverage@15%": 0.11328125, "calibration/coverage@20%": 0.298828125, "calibration/coverage@25%": 0.423828125, "calibration/coverage@30%": 0.569921875, "calibration/coverage@5%": 0.019140625, "calibration/ece": 0.16820631052342688, "calibration/mean_confidence": 0.5135351632988238, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 674.2, "completions/max_terminated_length": 451.6, "completions/mean_length": 174.34384765625, "completions/mean_terminated_length": 174.21048278808593, "completions/min_length": 79.2, "completions/min_terminated_length": 79.2, "epoch": 0.992, "grad_norm": 0.0009708595462143421, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1039837646.0, "reward": 1.015864658355713, "reward_std": 0.0644782729446888, "rewards/accuracy_reward": 0.56572265625, "rewards/brier_reward": 0.8120604991912842, "rewards/confidence_uniqueness_reward": 0.9418984651565552, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002516076737083495, "rewards/frontier_coverage_1": 0.14565924555063248, "rewards/frontier_coverage_10": 0.1338302969932556, "rewards/frontier_coverage_15": 0.09351640939712524, "rewards/frontier_coverage_20": 0.0904716819524765, "rewards/frontier_coverage_25": 0.14130311608314514, "rewards/frontier_coverage_5": 0.14565924555063248, "rewards/frontier_ece_reward": 0.003355812141671777, "signal/accuracy_reward/centered_abs_mean": 0.088238525390625, "signal/accuracy_reward/group_std_mean": 0.11467040479183196, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0441192626953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0441192626953125, "signal/advantage_abs_mean": 0.04950515627861023, "signal/advantage_pre_scale_abs_mean": 0.04950515627861023, "signal/advantage_pre_scale_std": 0.09848933815956115, "signal/advantage_std": 0.09848933815956115, "signal/brier_reward/centered_abs_mean": 0.10926359742879868, "signal/brier_reward/group_std_mean": 0.13695080131292342, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013657949678599835, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013657949678599835, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025544070824980735, "signal/confidence_uniqueness_reward/group_std_mean": 0.032745585590600965, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003193008853122592, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003193008853122592, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021328864386305213, "signal/frontier_aurc_reward/group_std_mean": 0.003304897760972381, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.817866781901103e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.817866781901103e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14268429577350616, "signal/frontier_coverage_1/group_std_mean": 0.1820806473493576, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025540488539263608, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025540488539263608, "signal/frontier_coverage_10/centered_abs_mean": 0.12527389973402023, "signal/frontier_coverage_10/group_std_mean": 0.15980836749076843, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022424027556553483, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022424027556553483, "signal/frontier_coverage_15/centered_abs_mean": 0.07919367253780366, "signal/frontier_coverage_15/group_std_mean": 0.10084569156169891, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014175667194649578, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014175667194649578, "signal/frontier_coverage_20/centered_abs_mean": 0.061572205275297165, "signal/frontier_coverage_20/group_std_mean": 0.0771061822772026, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011021424317732454, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011021424317732454, "signal/frontier_coverage_25/centered_abs_mean": 0.07973235845565796, "signal/frontier_coverage_25/group_std_mean": 0.10166804194450378, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014272091211751103, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014272091211751103, "signal/frontier_coverage_5/centered_abs_mean": 0.14268429577350616, "signal/frontier_coverage_5/group_std_mean": 0.1820806473493576, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025540488539263608, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025540488539263608, "signal/frontier_ece_reward/centered_abs_mean": 0.003300653723999858, "signal/frontier_ece_reward/group_std_mean": 0.004214685643091798, "signal/frontier_ece_reward/group_zero_std_frac": 0.04375, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00041258171549998226, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00041258171549998226, "step": 310 }, { "calibration/aurc": 0.1585634225548857, "calibration/batch_distribution_entropy": 0.8018340917186807, "calibration/buffer_distribution_entropy": 0.8974739698135898, "calibration/confidence_entropy": 0.32092406024176, "calibration/coverage@0%": 0.1025390625, "calibration/coverage@1%": 0.119140625, "calibration/coverage@10%": 0.3955078125, "calibration/coverage@15%": 0.5185546875, "calibration/coverage@20%": 0.6552734375, "calibration/coverage@25%": 0.78515625, "calibration/coverage@30%": 0.83984375, "calibration/coverage@5%": 0.2958984375, "calibration/ece": 0.16362565962695846, "calibration/mean_confidence": 0.6704694096269584, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.0, "completions/max_terminated_length": 405.0, "completions/mean_length": 172.0516586303711, "completions/mean_terminated_length": 172.0516586303711, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.9984, "num_tokens": 1046548430.0, "reward": 1.0416707396507263, "reward_std": 0.0726642906665802, "rewards/accuracy_reward": 0.6279296875, "rewards/brier_reward": 0.7980144023895264, "rewards/confidence_uniqueness_reward": 0.9470119476318359, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0022485224180854857, "rewards/frontier_coverage_1": 0.08259440585970879, "rewards/frontier_coverage_10": 0.07215217500925064, "rewards/frontier_coverage_15": 0.059676751494407654, "rewards/frontier_coverage_20": 0.07507448270916939, "rewards/frontier_coverage_25": 0.14635684341192245, "rewards/frontier_coverage_5": 0.08259440585970879, "rewards/frontier_ece_reward": 0.002701267832890153, "signal/accuracy_reward/centered_abs_mean": 0.085662841796875, "signal/accuracy_reward/group_std_mean": 0.12225553393363953, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0428314208984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0428314208984375, "signal/advantage_abs_mean": 0.0527034904807806, "signal/advantage_pre_scale_abs_mean": 0.0527034904807806, "signal/advantage_pre_scale_std": 0.10466087237000465, "signal/advantage_std": 0.10466087237000465, "signal/brier_reward/centered_abs_mean": 0.11700525507330894, "signal/brier_reward/group_std_mean": 0.15130788832902908, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014625656884163618, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014625656884163618, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023126959800720215, "signal/confidence_uniqueness_reward/group_std_mean": 0.02852536365389824, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002890869975090027, "signal/confidence_uniqueness_reward/weight": 0.125, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002890869975090027, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00226385158021003, "signal/frontier_aurc_reward/group_std_mean": 0.0038065230473876, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.052294389111921e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.052294389111921e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13711658865213394, "signal/frontier_coverage_1/group_std_mean": 0.1796187162399292, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024543870240449905, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024543870240449905, "signal/frontier_coverage_10/centered_abs_mean": 0.1192222610116005, "signal/frontier_coverage_10/group_std_mean": 0.1569937914609909, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002134078531526029, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002134078531526029, "signal/frontier_coverage_15/centered_abs_mean": 0.07506273686885834, "signal/frontier_coverage_15/group_std_mean": 0.09861153736710548, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013436229201033711, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013436229201033711, "signal/frontier_coverage_20/centered_abs_mean": 0.058869652450084686, "signal/frontier_coverage_20/group_std_mean": 0.07617875188589096, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010537666967138648, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010537666967138648, "signal/frontier_coverage_25/centered_abs_mean": 0.08716562017798424, "signal/frontier_coverage_25/group_std_mean": 0.11405183747410774, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015602644998580217, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015602644998580217, "signal/frontier_coverage_5/centered_abs_mean": 0.13711658865213394, "signal/frontier_coverage_5/group_std_mean": 0.1796187162399292, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024543870240449905, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024543870240449905, "signal/frontier_ece_reward/centered_abs_mean": 0.003408772055990994, "signal/frontier_ece_reward/group_std_mean": 0.004509588470682502, "signal/frontier_ece_reward/group_zero_std_frac": 0.0390625, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00042609650699887425, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00042609650699887425, "step": 312, "total_flos": 0.0, "train_loss": 0.00471675537865406, "train_runtime": 59905.1304, "train_samples_per_second": 0.334, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1046548430, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }