{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6224643780433982, "calibration/batch_distribution_entropy": 0.6544897379113672, "calibration/batch_entropy_100bins": 0.4863029185249278, "calibration/batch_entropy_10bins": 0.6544897379113672, "calibration/batch_entropy_50bins": 0.5701004408606952, "calibration/batch_uniqueness": 0.728444991952043, "calibration/confidence_entropy": 0.34767197334474165, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4841265820092806, "calibration/mean_confidence": 0.7931767989389904, "calibration/prompt_uniqueness": 0.6103076405494752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0353515625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1506.0, "completions/mean_length": 271.31015625, "completions/mean_terminated_length": 224.96051940917968, "completions/min_length": 1.8, "completions/min_terminated_length": 1.8, "epoch": 0.016, "grad_norm": 0.053285811096429825, "learning_rate": 3.1249999999999997e-07, "loss": 0.0705, "num_tokens": 17622248.0, "reward": 0.49289684891700747, "reward_std": 0.3958591163158417, "rewards/accuracy_reward": 0.2240234375, "rewards/brier_reward": 0.37564998865127563, "rewards/format_reward": 0.67802734375, "rewards/frontier_aurc_reward": 0.3027165472507477, "rewards/frontier_coverage_1": 0.3027165472507477, "rewards/frontier_coverage_10": 0.3027165472507477, "rewards/frontier_coverage_15": 0.3027165472507477, "rewards/frontier_coverage_20": 0.3027165472507477, "rewards/frontier_coverage_25": 0.3027165472507477, "rewards/frontier_coverage_5": 0.3027165472507477, "rewards/frontier_ece_reward": 0.3027165472507477, "rewards/frontier_entropy_batch_reward": -0.6468378663063049, "signal/accuracy_reward/centered_abs_mean": 0.24012451171875, "signal/accuracy_reward/group_bin_occupancy": 0.210546875, "signal/accuracy_reward/group_std_mean": 0.2819916486740112, "signal/accuracy_reward/group_zero_std_frac": 0.315625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.120062255859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.120062255859375, "signal/advantage_abs_mean": 0.33822785019874574, "signal/advantage_pre_scale_abs_mean": 0.33822785019874574, "signal/advantage_pre_scale_std": 0.40998163223266604, "signal/advantage_std": 0.40998163223266604, "signal/brier_reward/centered_abs_mean": 0.320052570104599, "signal/brier_reward/group_bin_occupancy": 0.745703125, "signal/brier_reward/group_std_mean": 0.36434565782546996, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04000657126307487, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.04000657126307487, "signal/format_reward/centered_abs_mean": 0.408428955078125, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.45669829845428467, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2042144775390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.2042144775390625, "signal/frontier_aurc_reward/centered_abs_mean": 0.29173809885978697, "signal/frontier_aurc_reward/group_bin_occupancy": 0.662890625, "signal/frontier_aurc_reward/group_std_mean": 0.34154740571975706, "signal/frontier_aurc_reward/group_zero_std_frac": 0.003125, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_1/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_1/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_1/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_10/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_10/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_10/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_15/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_15/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_15/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_20/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_20/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_20/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_25/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_25/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_25/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_5/centered_abs_mean": 0.29173809885978697, "signal/frontier_coverage_5/group_bin_occupancy": 0.662890625, "signal/frontier_coverage_5/group_std_mean": 0.34154740571975706, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005222111754119396, "signal/frontier_ece_reward/centered_abs_mean": 0.29173809885978697, "signal/frontier_ece_reward/group_bin_occupancy": 0.662890625, "signal/frontier_ece_reward/group_std_mean": 0.34154740571975706, "signal/frontier_ece_reward/group_zero_std_frac": 0.003125, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03646726235747337, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03646726235747337, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4318214237689972, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.308203125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4762145817279816, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05397767797112465, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05397767797112465, "step": 5 }, { "calibration/aurc": 0.6528287664498039, "calibration/batch_distribution_entropy": 0.6268190631519651, "calibration/batch_entropy_100bins": 0.4738420714903396, "calibration/batch_entropy_10bins": 0.6268190631519651, "calibration/batch_entropy_50bins": 0.554670162377654, "calibration/batch_uniqueness": 0.705548191421581, "calibration/confidence_entropy": 0.33243235077831834, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5101970205206909, "calibration/mean_confidence": 0.7973768305353158, "calibration/prompt_uniqueness": 0.5841085893747472, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.037109375, "completions/max_length": 1536.0, "completions/max_terminated_length": 1523.4, "completions/mean_length": 266.923828125, "completions/mean_terminated_length": 218.05512084960938, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.021607212722301483, "learning_rate": 6.249999999999999e-07, "loss": 0.0713, "num_tokens": 35455900.0, "reward": 0.5007035851478576, "reward_std": 0.378324830532074, "rewards/accuracy_reward": 0.2111328125, "rewards/brier_reward": 0.37633253931999205, "rewards/format_reward": 0.71708984375, "rewards/frontier_aurc_reward": 0.2987588942050934, "rewards/frontier_coverage_1": 0.2987588942050934, "rewards/frontier_coverage_10": 0.2987588942050934, "rewards/frontier_coverage_15": 0.2987588942050934, "rewards/frontier_coverage_20": 0.2987588942050934, "rewards/frontier_coverage_25": 0.2987588942050934, "rewards/frontier_coverage_5": 0.2987588942050934, "rewards/frontier_ece_reward": 0.2987588942050934, "rewards/frontier_entropy_batch_reward": -0.6818291902542114, "signal/accuracy_reward/centered_abs_mean": 0.22430419921875, "signal/accuracy_reward/group_bin_occupancy": 0.209765625, "signal/accuracy_reward/group_std_mean": 0.2700383305549622, "signal/accuracy_reward/group_zero_std_frac": 0.321875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.112152099609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.112152099609375, "signal/advantage_abs_mean": 0.3131078124046326, "signal/advantage_pre_scale_abs_mean": 0.3131078124046326, "signal/advantage_pre_scale_std": 0.3932444155216217, "signal/advantage_std": 0.3932444155216217, "signal/brier_reward/centered_abs_mean": 0.3091658055782318, "signal/brier_reward/group_bin_occupancy": 0.756640625, "signal/brier_reward/group_std_mean": 0.3574398994445801, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038645725697278976, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.038645725697278976, "signal/format_reward/centered_abs_mean": 0.377227783203125, "signal/format_reward/group_bin_occupancy": 0.25, "signal/format_reward/group_std_mean": 0.43797464966773986, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1886138916015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1886138916015625, "signal/frontier_aurc_reward/centered_abs_mean": 0.2825876474380493, "signal/frontier_aurc_reward/group_bin_occupancy": 0.67109375, "signal/frontier_aurc_reward/group_std_mean": 0.33760352730751036, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_1/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_1/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_1/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_10/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_10/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_10/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_15/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_15/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_15/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_20/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_20/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_20/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_25/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_25/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_25/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_5/centered_abs_mean": 0.2825876474380493, "signal/frontier_coverage_5/group_bin_occupancy": 0.67109375, "signal/frontier_coverage_5/group_std_mean": 0.33760352730751036, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005058318562805653, "signal/frontier_ece_reward/centered_abs_mean": 0.2825876474380493, "signal/frontier_ece_reward/group_bin_occupancy": 0.67109375, "signal/frontier_ece_reward/group_std_mean": 0.33760352730751036, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.035323455929756165, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.035323455929756165, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059325873851776, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.315625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4605302751064301, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0507415734231472, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0507415734231472, "step": 10 }, { "calibration/aurc": 0.616971247492587, "calibration/batch_distribution_entropy": 0.6409075565843491, "calibration/batch_entropy_100bins": 0.4772482403805065, "calibration/batch_entropy_10bins": 0.6409075565843491, "calibration/batch_entropy_50bins": 0.5566394338845917, "calibration/batch_uniqueness": 0.7049275200846712, "calibration/buffer_distribution_entropy": 0.6588770403392903, "calibration/buffer_entropy_100bins": 0.49200675404876176, "calibration/buffer_entropy_10bins": 0.6588770403392903, "calibration/buffer_entropy_50bins": 0.5748770253713922, "calibration/confidence_entropy": 0.34808615842826207, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4785652202870275, "calibration/mean_confidence": 0.8042481872309974, "calibration/prompt_uniqueness": 0.6151282665489992, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 1536.0, "completions/max_terminated_length": 1420.2, "completions/mean_length": 204.5537109375, "completions/mean_terminated_length": 183.505908203125, "completions/min_length": 3.2, "completions/min_terminated_length": 3.2, "epoch": 0.048, "grad_norm": 0.011996953748166561, "learning_rate": 9.374999999999999e-07, "loss": 0.0439, "num_tokens": 52599266.0, "reward": 0.608043098449707, "reward_std": 0.30430689454078674, "rewards/accuracy_reward": 0.274609375, "rewards/brier_reward": 0.4852728068828583, "rewards/format_reward": 0.8759765625, "rewards/frontier_aurc_reward": 0.30132074588909746, "rewards/frontier_coverage_1": 0.3177640035748482, "rewards/frontier_coverage_10": 0.3177640035748482, "rewards/frontier_coverage_15": 0.3177640035748482, "rewards/frontier_coverage_20": 0.3177640035748482, "rewards/frontier_coverage_25": 0.3177640035748482, "rewards/frontier_coverage_5": 0.3177640035748482, "rewards/frontier_ece_reward": 0.2919433981180191, "rewards/frontier_entropy_batch_reward": -0.8313869953155517, "signal/accuracy_reward/centered_abs_mean": 0.2015869140625, "signal/accuracy_reward/group_bin_occupancy": 0.2046875, "signal/accuracy_reward/group_std_mean": 0.2485917925834656, "signal/accuracy_reward/group_zero_std_frac": 0.3625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10079345703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10079345703125, "signal/advantage_abs_mean": 0.2351256161928177, "signal/advantage_pre_scale_abs_mean": 0.2351256161928177, "signal/advantage_pre_scale_std": 0.31978016495704653, "signal/advantage_std": 0.31978016495704653, "signal/brier_reward/centered_abs_mean": 0.2747634917497635, "signal/brier_reward/group_bin_occupancy": 0.794921875, "signal/brier_reward/group_std_mean": 0.330036336183548, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03434543646872044, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.03434543646872044, "signal/format_reward/centered_abs_mean": 0.20308837890625, "signal/format_reward/group_bin_occupancy": 0.24375, "signal/format_reward/group_std_mean": 0.3047700166702271, "signal/format_reward/group_zero_std_frac": 0.05, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.101544189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.101544189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.21934852562844753, "signal/frontier_aurc_reward/group_bin_occupancy": 0.735546875, "signal/frontier_aurc_reward/group_std_mean": 0.2648093054071069, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003926338179735467, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003926338179735467, "signal/frontier_coverage_1/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_1/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_1/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_10/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_10/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_10/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_15/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_15/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_15/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_20/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_20/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_20/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_25/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_25/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_25/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_5/centered_abs_mean": 0.23966625183820725, "signal/frontier_coverage_5/group_bin_occupancy": 0.708984375, "signal/frontier_coverage_5/group_std_mean": 0.29589507579803465, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004290025448426604, "signal/frontier_ece_reward/centered_abs_mean": 0.2458495855331421, "signal/frontier_ece_reward/group_bin_occupancy": 0.716796875, "signal/frontier_ece_reward/group_std_mean": 0.29648907482624054, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030731198191642762, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030731198191642762, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2641173452138901, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.340625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3699604392051697, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03301466815173626, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03301466815173626, "step": 15 }, { "calibration/aurc": 0.53447438580555, "calibration/batch_distribution_entropy": 0.697833452595151, "calibration/batch_entropy_100bins": 0.5162966612875538, "calibration/batch_entropy_10bins": 0.697833452595151, "calibration/batch_entropy_50bins": 0.601463977565162, "calibration/batch_uniqueness": 0.7540542644025435, "calibration/buffer_distribution_entropy": 0.6562396115815237, "calibration/buffer_entropy_100bins": 0.49265615739430785, "calibration/buffer_entropy_10bins": 0.6562396115815237, "calibration/buffer_entropy_50bins": 0.5747150103358988, "calibration/confidence_entropy": 0.361725120029885, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3901462729077969, "calibration/mean_confidence": 0.7812206546712639, "calibration/prompt_uniqueness": 0.6691434151673705, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004296875, "completions/max_length": 1536.0, "completions/max_terminated_length": 1216.6, "completions/mean_length": 143.82763671875, "completions/mean_terminated_length": 137.8384002685547, "completions/min_length": 34.0, "completions/min_terminated_length": 34.0, "epoch": 0.064, "grad_norm": 0.0032386924140155315, "learning_rate": 1e-06, "loss": 0.0098, "num_tokens": 68990461.0, "reward": 0.6213819026947022, "reward_std": 0.20088508129119872, "rewards/accuracy_reward": 0.3435546875, "rewards/brier_reward": 0.5725475311279297, "rewards/format_reward": 0.980078125, "rewards/frontier_aurc_reward": -0.007014566846191883, "rewards/frontier_coverage_1": 0.06326824426651001, "rewards/frontier_coverage_10": 0.06326824426651001, "rewards/frontier_coverage_15": 0.06326824426651001, "rewards/frontier_coverage_20": 0.06326824426651001, "rewards/frontier_coverage_25": 0.06326824426651001, "rewards/frontier_coverage_5": 0.06326824426651001, "rewards/frontier_ece_reward": -0.046458789124153556, "rewards/frontier_entropy_batch_reward": -0.9029202818870544, "signal/accuracy_reward/centered_abs_mean": 0.2005615234375, "signal/accuracy_reward/group_bin_occupancy": 0.207421875, "signal/accuracy_reward/group_std_mean": 0.25139918029308317, "signal/accuracy_reward/group_zero_std_frac": 0.340625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10028076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10028076171875, "signal/advantage_abs_mean": 0.15537169873714446, "signal/advantage_pre_scale_abs_mean": 0.15537169873714446, "signal/advantage_pre_scale_std": 0.21755909621715547, "signal/advantage_std": 0.21755909621715547, "signal/brier_reward/centered_abs_mean": 0.24199655055999755, "signal/brier_reward/group_bin_occupancy": 0.828515625, "signal/brier_reward/group_std_mean": 0.298342889547348, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030249568819999694, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.030249568819999694, "signal/format_reward/centered_abs_mean": 0.03763427734375, "signal/format_reward/group_bin_occupancy": 0.18203125, "signal/format_reward/group_std_mean": 0.0920264482498169, "signal/format_reward/group_zero_std_frac": 0.54375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018817138671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018817138671875, "signal/frontier_aurc_reward/centered_abs_mean": 0.005011124256998301, "signal/frontier_aurc_reward/group_bin_occupancy": 0.75546875, "signal/frontier_aurc_reward/group_std_mean": 0.006882566865533591, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.969911868916824e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.969911868916824e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_1/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_1/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_10/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_10/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_10/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_15/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_15/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_15/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_20/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_20/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_20/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_25/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_25/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_25/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_5/centered_abs_mean": 0.10851092785596847, "signal/frontier_coverage_5/group_bin_occupancy": 0.6796875, "signal/frontier_coverage_5/group_std_mean": 0.16966440081596373, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019423455698415637, "signal/frontier_ece_reward/centered_abs_mean": 0.14095230400562286, "signal/frontier_ece_reward/group_bin_occupancy": 0.76015625, "signal/frontier_ece_reward/group_std_mean": 0.16866419315338135, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.017619038000702858, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.017619038000702858, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16997582614421844, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.358984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2987139880657196, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.071875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021246978268027305, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021246978268027305, "step": 20 }, { "calibration/aurc": 0.6205064937493211, "calibration/batch_distribution_entropy": 0.8188591589225596, "calibration/batch_entropy_100bins": 0.6092270818068604, "calibration/batch_entropy_10bins": 0.8188591589225596, "calibration/batch_entropy_50bins": 0.6969438827371384, "calibration/batch_uniqueness": 0.8361927117817313, "calibration/buffer_distribution_entropy": 0.6856559337312504, "calibration/buffer_entropy_100bins": 0.5146458039959066, "calibration/buffer_entropy_10bins": 0.6856559337312504, "calibration/buffer_entropy_50bins": 0.5978699532757095, "calibration/confidence_entropy": 0.4192333208619353, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.3946832247035178, "calibration/mean_confidence": 0.705443334403485, "calibration/prompt_uniqueness": 0.7620859228603916, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 1378.0, "completions/max_terminated_length": 926.6, "completions/mean_length": 121.5697265625, "completions/mean_terminated_length": 119.77201538085937, "completions/min_length": 28.2, "completions/min_terminated_length": 28.2, "epoch": 0.08, "grad_norm": 0.012476031668484211, "learning_rate": 1e-06, "loss": 0.0025, "num_tokens": 85168487.0, "reward": 0.6544445514678955, "reward_std": 0.18224802613258362, "rewards/accuracy_reward": 0.36328125, "rewards/brier_reward": 0.6255658030509949, "rewards/format_reward": 0.99404296875, "rewards/frontier_aurc_reward": -0.006063262652605772, "rewards/frontier_coverage_1": 0.07632581368088723, "rewards/frontier_coverage_10": 0.07632581368088723, "rewards/frontier_coverage_15": 0.07632581368088723, "rewards/frontier_coverage_20": 0.07632581368088723, "rewards/frontier_coverage_25": 0.07632581368088723, "rewards/frontier_coverage_5": 0.07632581368088723, "rewards/frontier_ece_reward": -0.03705122843384743, "rewards/frontier_entropy_batch_reward": -0.8469658613204956, "signal/accuracy_reward/centered_abs_mean": 0.188232421875, "signal/accuracy_reward/group_bin_occupancy": 0.203125, "signal/accuracy_reward/group_std_mean": 0.2347244828939438, "signal/accuracy_reward/group_zero_std_frac": 0.375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0941162109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0941162109375, "signal/advantage_abs_mean": 0.14390270113945008, "signal/advantage_pre_scale_abs_mean": 0.14390270113945008, "signal/advantage_pre_scale_std": 0.1975090980529785, "signal/advantage_std": 0.1975090980529785, "signal/brier_reward/centered_abs_mean": 0.23567027747631072, "signal/brier_reward/group_bin_occupancy": 0.867578125, "signal/brier_reward/group_std_mean": 0.28802819848060607, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02945878468453884, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02945878468453884, "signal/format_reward/centered_abs_mean": 0.011297607421875, "signal/format_reward/group_bin_occupancy": 0.144921875, "signal/format_reward/group_std_mean": 0.029941194131970404, "signal/format_reward/group_zero_std_frac": 0.840625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0056488037109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0056488037109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0037031634245067837, "signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125, "signal/frontier_aurc_reward/group_std_mean": 0.005138655751943588, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.628662376897409e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.628662376897409e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_1/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_1/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_10/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_10/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_10/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_15/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_15/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_15/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_20/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_20/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_20/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_25/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_25/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_25/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_5/centered_abs_mean": 0.1505482792854309, "signal/frontier_coverage_5/group_bin_occupancy": 0.77265625, "signal/frontier_coverage_5/group_std_mean": 0.21810686886310576, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002694814093410969, "signal/frontier_ece_reward/centered_abs_mean": 0.1332566112279892, "signal/frontier_ece_reward/group_bin_occupancy": 0.82265625, "signal/frontier_ece_reward/group_std_mean": 0.16948509812355042, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01665707640349865, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01665707640349865, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2531877249479294, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.450390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3905863881111145, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031648465618491176, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031648465618491176, "step": 25 }, { "calibration/aurc": 0.6337002880818469, "calibration/batch_distribution_entropy": 0.9515234692633457, "calibration/batch_entropy_100bins": 0.7790651280496668, "calibration/batch_entropy_10bins": 0.9515234692633457, "calibration/batch_entropy_50bins": 0.848934936739482, "calibration/batch_uniqueness": 0.909145581071251, "calibration/buffer_distribution_entropy": 0.7460870471656004, "calibration/buffer_entropy_100bins": 0.565529106007607, "calibration/buffer_entropy_10bins": 0.7460870471656004, "calibration/buffer_entropy_50bins": 0.6492569955197386, "calibration/confidence_entropy": 0.49321506704742396, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.29262828029023924, "calibration/mean_confidence": 0.5433459062155098, "calibration/prompt_uniqueness": 0.8469597941687622, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021484375, "completions/max_length": 1536.0, "completions/max_terminated_length": 929.8, "completions/mean_length": 118.1986328125, "completions/mean_terminated_length": 115.14469146728516, "completions/min_length": 38.6, "completions/min_terminated_length": 38.6, "epoch": 0.096, "grad_norm": 0.0032025109976530075, "learning_rate": 1e-06, "loss": 0.0048, "num_tokens": 101423449.0, "reward": 0.6895796895027161, "reward_std": 0.17854192554950715, "rewards/accuracy_reward": 0.35771484375, "rewards/brier_reward": 0.6795345783233643, "rewards/format_reward": 0.99248046875, "rewards/frontier_aurc_reward": -0.005398123059421778, "rewards/frontier_coverage_1": 0.10967106521129608, "rewards/frontier_coverage_10": 0.10967106521129608, "rewards/frontier_coverage_15": 0.10967106521129608, "rewards/frontier_coverage_20": 0.10967106521129608, "rewards/frontier_coverage_25": 0.10967106521129608, "rewards/frontier_coverage_5": 0.10967106521129608, "rewards/frontier_ece_reward": -0.026541496440768243, "rewards/frontier_entropy_batch_reward": -0.6305931687355042, "signal/accuracy_reward/centered_abs_mean": 0.189324951171875, "signal/accuracy_reward/group_bin_occupancy": 0.20390625, "signal/accuracy_reward/group_std_mean": 0.2379360795021057, "signal/accuracy_reward/group_zero_std_frac": 0.36875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0946624755859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0946624755859375, "signal/advantage_abs_mean": 0.1388890862464905, "signal/advantage_pre_scale_abs_mean": 0.1388890862464905, "signal/advantage_pre_scale_std": 0.1868252784013748, "signal/advantage_std": 0.1868252784013748, "signal/brier_reward/centered_abs_mean": 0.2370523989200592, "signal/brier_reward/group_bin_occupancy": 0.90390625, "signal/brier_reward/group_std_mean": 0.288687926530838, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0296315498650074, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.0296315498650074, "signal/format_reward/centered_abs_mean": 0.014483642578125, "signal/format_reward/group_bin_occupancy": 0.15234375, "signal/format_reward/group_std_mean": 0.04018273241817951, "signal/format_reward/group_zero_std_frac": 0.78125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0072418212890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0072418212890625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002838827669620514, "signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125, "signal/frontier_aurc_reward/group_std_mean": 0.004378228541463613, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0815014401450756e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0815014401450756e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_1/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_1/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_10/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_10/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_10/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_15/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_15/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_15/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_20/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_20/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_20/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_25/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_25/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_25/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_5/centered_abs_mean": 0.23764651417732238, "signal/frontier_coverage_5/group_bin_occupancy": 0.908984375, "signal/frontier_coverage_5/group_std_mean": 0.3083926856517792, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004253872437402606, "signal/frontier_ece_reward/centered_abs_mean": 0.11303882747888565, "signal/frontier_ece_reward/group_bin_occupancy": 0.83671875, "signal/frontier_ece_reward/group_std_mean": 0.15580815970897674, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014129853434860706, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014129853434860706, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4395421028137207, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.62890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5271062850952148, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.054942762851715087, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.054942762851715087, "step": 30 }, { "calibration/aurc": 0.511020845461892, "calibration/batch_distribution_entropy": 0.9269066170510761, "calibration/batch_entropy_100bins": 0.9245069542553294, "calibration/batch_entropy_10bins": 0.9269066170510761, "calibration/batch_entropy_50bins": 0.9373367432265832, "calibration/batch_uniqueness": 0.9446476489301399, "calibration/buffer_distribution_entropy": 0.829412139862835, "calibration/buffer_entropy_100bins": 0.6628959346066731, "calibration/buffer_entropy_10bins": 0.829412139862835, "calibration/buffer_entropy_50bins": 0.7391661237028551, "calibration/confidence_entropy": 0.5010959880267044, "calibration/coverage@0%": 0.004707438321562174, "calibration/coverage@1%": 0.004707438321562174, "calibration/coverage@10%": 0.004707438321562174, "calibration/coverage@15%": 0.005100365630010111, "calibration/coverage@20%": 0.00745639160473512, "calibration/coverage@25%": 0.01687046764772599, "calibration/coverage@30%": 0.0360861539222358, "calibration/coverage@5%": 0.004707438321562174, "calibration/ece": 0.19524205952621834, "calibration/mean_confidence": 0.3591607106056175, "calibration/prompt_uniqueness": 0.8846453539155539, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00185546875, "completions/max_length": 1536.0, "completions/max_terminated_length": 867.0, "completions/mean_length": 117.60078125, "completions/mean_terminated_length": 114.96354064941406, "completions/min_length": 42.0, "completions/min_terminated_length": 42.0, "epoch": 0.112, "grad_norm": 0.002222000854089856, "learning_rate": 1e-06, "loss": 0.0057, "num_tokens": 117737153.0, "reward": 0.7422285556793213, "reward_std": 0.14107392430305482, "rewards/accuracy_reward": 0.387109375, "rewards/brier_reward": 0.7211146593093872, "rewards/format_reward": 0.99482421875, "rewards/frontier_aurc_reward": -0.0045266709290444854, "rewards/frontier_coverage_1": 0.13778235018253326, "rewards/frontier_coverage_10": 0.13778235018253326, "rewards/frontier_coverage_15": 0.13778235018253326, "rewards/frontier_coverage_20": 0.13778235018253326, "rewards/frontier_coverage_25": 0.13778235018253326, "rewards/frontier_coverage_5": 0.13778235018253326, "rewards/frontier_ece_reward": -0.0034593752585351466, "rewards/frontier_entropy_batch_reward": -0.42529548406600953, "signal/accuracy_reward/centered_abs_mean": 0.190576171875, "signal/accuracy_reward/group_bin_occupancy": 0.20546875, "signal/accuracy_reward/group_std_mean": 0.24057506322860717, "signal/accuracy_reward/group_zero_std_frac": 0.35625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0952880859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0952880859375, "signal/advantage_abs_mean": 0.10944210141897201, "signal/advantage_pre_scale_abs_mean": 0.10944210141897201, "signal/advantage_pre_scale_std": 0.1517003059387207, "signal/advantage_std": 0.1517003059387207, "signal/brier_reward/centered_abs_mean": 0.20343652367591858, "signal/brier_reward/group_bin_occupancy": 0.880078125, "signal/brier_reward/group_std_mean": 0.2546698063611984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025429565459489822, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.025429565459489822, "signal/format_reward/centered_abs_mean": 0.009954833984375, "signal/format_reward/group_bin_occupancy": 0.14375, "signal/format_reward/group_std_mean": 0.027537884190678596, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0049774169921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0049774169921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0012978114187717437, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72578125, "signal/frontier_aurc_reward/group_std_mean": 0.0021451528184115885, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3230824081110767e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3230824081110767e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_1/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_1/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_10/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_10/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_10/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_15/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_15/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_15/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_20/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_20/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_20/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_25/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_25/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_25/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_5/centered_abs_mean": 0.317331862449646, "signal/frontier_coverage_5/group_bin_occupancy": 0.941796875, "signal/frontier_coverage_5/group_std_mean": 0.3929042756557465, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005680239945650101, "signal/frontier_ece_reward/centered_abs_mean": 0.05593574643135071, "signal/frontier_ece_reward/group_bin_occupancy": 0.801953125, "signal/frontier_ece_reward/group_std_mean": 0.08778993785381317, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006991968303918838, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006991968303918838, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.448394775390625, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5047555208206177, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.056049346923828125, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.056049346923828125, "step": 35 }, { "calibration/aurc": 0.5883847168275128, "calibration/batch_distribution_entropy": 0.91090173474976, "calibration/batch_entropy_100bins": 0.931093997601525, "calibration/batch_entropy_10bins": 0.91090173474976, "calibration/batch_entropy_50bins": 0.9358617585488869, "calibration/batch_uniqueness": 0.9426319952686615, "calibration/buffer_distribution_entropy": 0.893493430028969, "calibration/buffer_entropy_100bins": 0.7499790530020946, "calibration/buffer_entropy_10bins": 0.893493430028969, "calibration/buffer_entropy_50bins": 0.8144149519247129, "calibration/confidence_entropy": 0.5064471270952413, "calibration/coverage@0%": 0.002359110808594898, "calibration/coverage@1%": 0.002359110808594898, "calibration/coverage@10%": 0.002359110808594898, "calibration/coverage@15%": 0.002359110808594898, "calibration/coverage@20%": 0.005897777021487245, "calibration/coverage@25%": 0.005897777021487245, "calibration/coverage@30%": 0.006290704329935182, "calibration/coverage@5%": 0.002359110808594898, "calibration/ece": 0.19177551118694733, "calibration/mean_confidence": 0.3377996746346041, "calibration/prompt_uniqueness": 0.8836925240634755, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00283203125, "completions/max_length": 1536.0, "completions/max_terminated_length": 896.0, "completions/mean_length": 122.0451171875, "completions/mean_terminated_length": 118.02766723632813, "completions/min_length": 40.2, "completions/min_terminated_length": 40.2, "epoch": 0.128, "grad_norm": 0.0016771440859884024, "learning_rate": 1e-06, "loss": 0.0077, "num_tokens": 133903567.0, "reward": 0.7402719259262085, "reward_std": 0.13263684809207915, "rewards/accuracy_reward": 0.380078125, "rewards/brier_reward": 0.7196099877357482, "rewards/format_reward": 0.9953125, "rewards/frontier_aurc_reward": -0.0044641831889748575, "rewards/frontier_coverage_1": 0.1357348829507828, "rewards/frontier_coverage_10": 0.1357348829507828, "rewards/frontier_coverage_15": 0.1357348829507828, "rewards/frontier_coverage_20": 0.1357348829507828, "rewards/frontier_coverage_25": 0.1357348829507828, "rewards/frontier_coverage_5": 0.1357348829507828, "rewards/frontier_ece_reward": 0.0004683260805904865, "rewards/frontier_entropy_batch_reward": -0.4154496967792511, "signal/accuracy_reward/centered_abs_mean": 0.174169921875, "signal/accuracy_reward/group_bin_occupancy": 0.203515625, "signal/accuracy_reward/group_std_mean": 0.2256518006324768, "signal/accuracy_reward/group_zero_std_frac": 0.371875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0870849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0870849609375, "signal/advantage_abs_mean": 0.10101482570171356, "signal/advantage_pre_scale_abs_mean": 0.10101482570171356, "signal/advantage_pre_scale_std": 0.1429665595293045, "signal/advantage_std": 0.1429665595293045, "signal/brier_reward/centered_abs_mean": 0.19600196480751036, "signal/brier_reward/group_bin_occupancy": 0.88515625, "signal/brier_reward/group_std_mean": 0.24760811030864716, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024500245600938796, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024500245600938796, "signal/format_reward/centered_abs_mean": 0.00906982421875, "signal/format_reward/group_bin_occupancy": 0.143359375, "signal/format_reward/group_std_mean": 0.026180195435881615, "signal/format_reward/group_zero_std_frac": 0.853125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004534912109375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004534912109375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0013354318216443062, "signal/frontier_aurc_reward/group_bin_occupancy": 0.773828125, "signal/frontier_aurc_reward/group_std_mean": 0.0020624040393158794, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3904228874016555e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3904228874016555e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_1/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_1/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_10/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_10/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_10/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_15/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_15/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_15/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_20/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_20/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_20/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_25/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_25/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_25/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_5/centered_abs_mean": 0.3095270454883575, "signal/frontier_coverage_5/group_bin_occupancy": 0.940234375, "signal/frontier_coverage_5/group_std_mean": 0.3819129645824432, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005540534015744924, "signal/frontier_ece_reward/centered_abs_mean": 0.048538880050182344, "signal/frontier_ece_reward/group_bin_occupancy": 0.7890625, "signal/frontier_ece_reward/group_std_mean": 0.07725905627012253, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006067360006272793, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006067360006272793, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4226137399673462, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4835656762123108, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.052826717495918274, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.052826717495918274, "step": 40 }, { "calibration/aurc": 0.4476721577377777, "calibration/batch_distribution_entropy": 0.9775319646696126, "calibration/batch_entropy_100bins": 0.965599725401858, "calibration/batch_entropy_10bins": 0.9775319646696126, "calibration/batch_entropy_50bins": 0.9741778975790041, "calibration/batch_uniqueness": 0.9526775737262507, "calibration/buffer_distribution_entropy": 0.9258372964444268, "calibration/buffer_entropy_100bins": 0.8048134162077932, "calibration/buffer_entropy_10bins": 0.9258372964444268, "calibration/buffer_entropy_50bins": 0.8588073303858943, "calibration/confidence_entropy": 0.5410896147965197, "calibration/coverage@0%": 0.004303763885691263, "calibration/coverage@1%": 0.004303763885691263, "calibration/coverage@10%": 0.004303763885691263, "calibration/coverage@15%": 0.010163138885691262, "calibration/coverage@20%": 0.01758654275066191, "calibration/coverage@25%": 0.12735369961340698, "calibration/coverage@30%": 0.20470204520164229, "calibration/coverage@5%": 0.004303763885691263, "calibration/ece": 0.21352444508406468, "calibration/mean_confidence": 0.48576702979342884, "calibration/prompt_uniqueness": 0.8955215311199272, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00244140625, "completions/max_length": 1536.0, "completions/max_terminated_length": 825.0, "completions/mean_length": 122.3205078125, "completions/mean_terminated_length": 118.86110992431641, "completions/min_length": 46.8, "completions/min_terminated_length": 46.8, "epoch": 0.144, "grad_norm": 0.001746510504744947, "learning_rate": 1e-06, "loss": 0.0087, "num_tokens": 150106561.0, "reward": 0.8010232448577881, "reward_std": 0.1486440747976303, "rewards/accuracy_reward": 0.4798828125, "rewards/brier_reward": 0.7014939427375794, "rewards/format_reward": 0.9962890625, "rewards/frontier_aurc_reward": -0.004155356530100107, "rewards/frontier_coverage_1": 0.03323503416031599, "rewards/frontier_coverage_10": 0.03323503416031599, "rewards/frontier_coverage_15": 0.03323503416031599, "rewards/frontier_coverage_20": 0.03323503416031599, "rewards/frontier_coverage_25": 0.03323503416031599, "rewards/frontier_coverage_5": 0.03323503416031599, "rewards/frontier_ece_reward": 0.0009955904446542264, "rewards/frontier_entropy_batch_reward": -0.22695176005363465, "signal/accuracy_reward/centered_abs_mean": 0.17799072265625, "signal/accuracy_reward/group_bin_occupancy": 0.204296875, "signal/accuracy_reward/group_std_mean": 0.22953784465789795, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088995361328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.088995361328125, "signal/advantage_abs_mean": 0.11683495044708252, "signal/advantage_pre_scale_abs_mean": 0.11683495044708252, "signal/advantage_pre_scale_std": 0.1583361119031906, "signal/advantage_std": 0.1583361119031906, "signal/brier_reward/centered_abs_mean": 0.2091756820678711, "signal/brier_reward/group_bin_occupancy": 0.9421875, "signal/brier_reward/group_std_mean": 0.2564647078514099, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026146960258483887, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.026146960258483887, "signal/format_reward/centered_abs_mean": 0.00714111328125, "signal/format_reward/group_bin_occupancy": 0.13828125, "signal/format_reward/group_std_mean": 0.01964699849486351, "signal/format_reward/group_zero_std_frac": 0.89375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003570556640625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003570556640625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002061827527359128, "signal/frontier_aurc_reward/group_bin_occupancy": 0.815234375, "signal/frontier_aurc_reward/group_std_mean": 0.0029503189492970706, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6906712193740535e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6906712193740535e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_1/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_1/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_10/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_10/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_10/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_15/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_15/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_15/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_20/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_20/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_20/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_25/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_25/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_25/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_5/centered_abs_mean": 0.2580311119556427, "signal/frontier_coverage_5/group_bin_occupancy": 0.945703125, "signal/frontier_coverage_5/group_std_mean": 0.32445969581604006, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004618756845593453, "signal/frontier_ece_reward/centered_abs_mean": 0.06224460154771805, "signal/frontier_ece_reward/group_bin_occupancy": 0.85703125, "signal/frontier_ece_reward/group_std_mean": 0.0892532080411911, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007780575193464756, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007780575193464756, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31719207763671875, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39413705468177795, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039649009704589844, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039649009704589844, "step": 45 }, { "calibration/aurc": 0.5193675277297729, "calibration/batch_distribution_entropy": 0.983820122323354, "calibration/batch_entropy_100bins": 0.9716850235861294, "calibration/batch_entropy_10bins": 0.983820122323354, "calibration/batch_entropy_50bins": 0.9798083767768972, "calibration/batch_uniqueness": 0.9539231310533063, "calibration/buffer_distribution_entropy": 0.9415060100334243, "calibration/buffer_entropy_100bins": 0.8440598741793546, "calibration/buffer_entropy_10bins": 0.9415060100334243, "calibration/buffer_entropy_50bins": 0.888877610865622, "calibration/confidence_entropy": 0.5284500641790459, "calibration/coverage@0%": 0.002352179777096941, "calibration/coverage@1%": 0.002352179777096941, "calibration/coverage@10%": 0.002352179777096941, "calibration/coverage@15%": 0.002352179777096941, "calibration/coverage@20%": 0.003134958642067587, "calibration/coverage@25%": 0.004700516372008879, "calibration/coverage@30%": 0.005483295236979524, "calibration/coverage@5%": 0.002352179777096941, "calibration/ece": 0.21469450707488327, "calibration/mean_confidence": 0.5467065595076134, "calibration/prompt_uniqueness": 0.8933109723215835, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00185546875, "completions/max_length": 1536.0, "completions/max_terminated_length": 966.0, "completions/mean_length": 125.73544921875, "completions/mean_terminated_length": 123.11231536865235, "completions/min_length": 50.6, "completions/min_terminated_length": 50.6, "epoch": 0.16, "grad_norm": 0.0022161127999424934, "learning_rate": 1e-06, "loss": 0.0047, "num_tokens": 166415020.0, "reward": 0.7875006437301636, "reward_std": 0.15233681201934815, "rewards/accuracy_reward": 0.44111328125, "rewards/brier_reward": 0.6943708896636963, "rewards/format_reward": 0.9970703125, "rewards/frontier_aurc_reward": -0.004590437188744545, "rewards/frontier_coverage_1": 0.05425913706421852, "rewards/frontier_coverage_10": 0.05425913706421852, "rewards/frontier_coverage_15": 0.05425913706421852, "rewards/frontier_coverage_20": 0.05425913706421852, "rewards/frontier_coverage_25": 0.05425913706421852, "rewards/frontier_coverage_5": 0.05425913706421852, "rewards/frontier_ece_reward": -0.0013219955493696035, "rewards/frontier_entropy_batch_reward": -0.19174024760723113, "signal/accuracy_reward/centered_abs_mean": 0.169244384765625, "signal/accuracy_reward/group_bin_occupancy": 0.196484375, "signal/accuracy_reward/group_std_mean": 0.2129174590110779, "signal/accuracy_reward/group_zero_std_frac": 0.428125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0846221923828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0846221923828125, "signal/advantage_abs_mean": 0.122350013256073, "signal/advantage_pre_scale_abs_mean": 0.122350013256073, "signal/advantage_pre_scale_std": 0.16442006826400757, "signal/advantage_std": 0.16442006826400757, "signal/brier_reward/centered_abs_mean": 0.21847104728221894, "signal/brier_reward/group_bin_occupancy": 0.945703125, "signal/brier_reward/group_std_mean": 0.26607994437217714, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027308880910277368, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.027308880910277368, "signal/format_reward/centered_abs_mean": 0.00565185546875, "signal/format_reward/group_bin_occupancy": 0.1359375, "signal/format_reward/group_std_mean": 0.01590019799768925, "signal/format_reward/group_zero_std_frac": 0.9125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002825927734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002825927734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002719328412786126, "signal/frontier_aurc_reward/group_bin_occupancy": 0.8203125, "signal/frontier_aurc_reward/group_std_mean": 0.0038117329590022565, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.867597672273405e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.867597672273405e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_1/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_1/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_10/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_10/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_10/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_15/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_15/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_15/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_20/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_20/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_20/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_25/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_25/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_25/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_5/centered_abs_mean": 0.22449846267700196, "signal/frontier_coverage_5/group_bin_occupancy": 0.924609375, "signal/frontier_coverage_5/group_std_mean": 0.29227436184883115, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004018522240221501, "signal/frontier_ece_reward/centered_abs_mean": 0.07301094681024552, "signal/frontier_ece_reward/group_bin_occupancy": 0.905078125, "signal/frontier_ece_reward/group_std_mean": 0.09825690239667892, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00912636835128069, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00912636835128069, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2845084547996521, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758203125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3657692790031433, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03556355684995651, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03556355684995651, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.6180015396742866, "eval_calibration/batch_distribution_entropy": 0.9232840224430083, "eval_calibration/batch_entropy_100bins": 0.719649833384205, "eval_calibration/batch_entropy_10bins": 0.9232840224430083, "eval_calibration/batch_entropy_50bins": 0.8035427782506473, "eval_calibration/batch_uniqueness": 0.9052734375, "eval_calibration/buffer_distribution_entropy": 0.9484987743799115, "eval_calibration/buffer_entropy_100bins": 0.8625570001373941, "eval_calibration/buffer_entropy_10bins": 0.9484987743799115, "eval_calibration/buffer_entropy_50bins": 0.9029135065796394, "eval_calibration/confidence_entropy": 0.5333216449567841, "eval_calibration/coverage@0%": 0.0, "eval_calibration/coverage@1%": 0.0, "eval_calibration/coverage@10%": 0.0, "eval_calibration/coverage@15%": 0.0, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.046875, "eval_calibration/coverage@30%": 0.140625, "eval_calibration/coverage@5%": 0.0, "eval_calibration/ece": 0.330758367790426, "eval_calibration/mean_confidence": 0.5541032170362717, "eval_calibration/prompt_uniqueness": 0.9052734375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 409.75, "eval_completions/max_terminated_length": 409.75, "eval_completions/mean_length": 132.97144317626953, "eval_completions/mean_terminated_length": 132.97144317626953, "eval_completions/min_length": 66.0, "eval_completions/min_terminated_length": 66.0, "eval_loss": 0.0, "eval_num_tokens": 166415020.0, "eval_reward": 0.6569966375827789, "eval_reward_std": 0.23399890586733818, "eval_rewards/accuracy_reward": 0.365234375, "eval_rewards/brier_reward": 0.703216090798378, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004859182401560247, "eval_rewards/frontier_coverage_1": 0.10959587432444096, "eval_rewards/frontier_coverage_10": 0.10959587432444096, "eval_rewards/frontier_coverage_15": 0.10959587432444096, "eval_rewards/frontier_coverage_20": 0.10959587432444096, "eval_rewards/frontier_coverage_25": 0.10959587432444096, "eval_rewards/frontier_coverage_5": 0.10959587432444096, "eval_rewards/frontier_ece_reward": -0.001649250101763755, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.2387, "eval_samples_per_second": 24.705, "eval_signal/accuracy_reward/centered_abs_mean": 0.4493408203125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.48055653274059296, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22467041015625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22467041015625, "eval_signal/advantage_abs_mean": 0.20993919670581818, "eval_signal/advantage_pre_scale_abs_mean": 0.20993919670581818, "eval_signal/advantage_pre_scale_std": 0.23163216933608055, "eval_signal/advantage_std": 0.23163216933608055, "eval_signal/brier_reward/centered_abs_mean": 0.21175387874245644, "eval_signal/brier_reward/group_bin_occupancy": 0.96875, "eval_signal/brier_reward/group_std_mean": 0.2560478299856186, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026469234842807055, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.026469234842807055, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003475597535725683, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.921875, "eval_signal/frontier_aurc_reward/group_std_mean": 0.004670257214456797, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.22131901764078e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.22131901764078e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_15/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_20/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_25/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.28018152713775635, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.36891133338212967, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005015249014832079, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.0727236233651638, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.875, "eval_signal/frontier_ece_reward/group_std_mean": 0.10696529969573021, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009090452920645475, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009090452920645475, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.198, "step": 50 }, { "calibration/aurc": 0.49154793221632376, "calibration/batch_distribution_entropy": 0.9923982265099717, "calibration/batch_entropy_100bins": 0.976761110553309, "calibration/batch_entropy_10bins": 0.9923982265099717, "calibration/batch_entropy_50bins": 0.9853177330835299, "calibration/batch_uniqueness": 0.9559410835975026, "calibration/buffer_distribution_entropy": 0.9529318045546697, "calibration/buffer_entropy_100bins": 0.8728419282733055, "calibration/buffer_entropy_10bins": 0.9529318045546697, "calibration/buffer_entropy_50bins": 0.9107275009636199, "calibration/confidence_entropy": 0.5137282393272424, "calibration/coverage@0%": 0.003908543297455968, "calibration/coverage@1%": 0.003908543297455968, "calibration/coverage@10%": 0.003908543297455968, "calibration/coverage@15%": 0.0050804182974559685, "calibration/coverage@20%": 0.009769447162426614, "calibration/coverage@25%": 0.010550697162426615, "calibration/coverage@30%": 0.021499663649706457, "calibration/coverage@5%": 0.003908543297455968, "calibration/ece": 0.21824396695551201, "calibration/mean_confidence": 0.4844792598613541, "calibration/prompt_uniqueness": 0.8977543635373308, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 1536.0, "completions/max_terminated_length": 887.8, "completions/mean_length": 135.89765625, "completions/mean_terminated_length": 134.25655059814454, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.176, "grad_norm": 0.001891042571514845, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 183043732.0, "reward": 0.7944678544998169, "reward_std": 0.1392228126525879, "rewards/accuracy_reward": 0.44111328125, "rewards/brier_reward": 0.6948043942451477, "rewards/format_reward": 0.9982421875, "rewards/frontier_aurc_reward": -0.004382272064685821, "rewards/frontier_coverage_1": 0.06402078047394752, "rewards/frontier_coverage_10": 0.06402078047394752, "rewards/frontier_coverage_15": 0.06402078047394752, "rewards/frontier_coverage_20": 0.06402078047394752, "rewards/frontier_coverage_25": 0.06402078047394752, "rewards/frontier_coverage_5": 0.06402078047394752, "rewards/frontier_ece_reward": 0.0002359504927881062, "rewards/frontier_entropy_batch_reward": -0.15109863579273225, "signal/accuracy_reward/centered_abs_mean": 0.155535888671875, "signal/accuracy_reward/group_bin_occupancy": 0.19375, "signal/accuracy_reward/group_std_mean": 0.20017340481281282, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777679443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0777679443359375, "signal/advantage_abs_mean": 0.11083731651306153, "signal/advantage_pre_scale_abs_mean": 0.11083731651306153, "signal/advantage_pre_scale_std": 0.14989208579063415, "signal/advantage_std": 0.14989208579063415, "signal/brier_reward/centered_abs_mean": 0.22810422778129577, "signal/brier_reward/group_bin_occupancy": 0.94296875, "signal/brier_reward/group_std_mean": 0.27661994099617004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02851302847266197, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02851302847266197, "signal/format_reward/centered_abs_mean": 0.0033935546875, "signal/format_reward/group_bin_occupancy": 0.131640625, "signal/format_reward/group_std_mean": 0.009607380395755172, "signal/format_reward/group_zero_std_frac": 0.946875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00169677734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00169677734375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026019237469881774, "signal/frontier_aurc_reward/group_bin_occupancy": 0.793359375, "signal/frontier_aurc_reward/group_std_mean": 0.0037212247960269453, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.657443350879476e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.657443350879476e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_1/group_bin_occupancy": 0.934375, "signal/frontier_coverage_1/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_10/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_10/group_bin_occupancy": 0.934375, "signal/frontier_coverage_10/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_15/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_15/group_bin_occupancy": 0.934375, "signal/frontier_coverage_15/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_20/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_20/group_bin_occupancy": 0.934375, "signal/frontier_coverage_20/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_25/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_25/group_bin_occupancy": 0.934375, "signal/frontier_coverage_25/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_5/centered_abs_mean": 0.26558775305747984, "signal/frontier_coverage_5/group_bin_occupancy": 0.934375, "signal/frontier_coverage_5/group_std_mean": 0.3330970585346222, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004754020553082228, "signal/frontier_ece_reward/centered_abs_mean": 0.06770721971988677, "signal/frontier_ece_reward/group_bin_occupancy": 0.89609375, "signal/frontier_ece_reward/group_std_mean": 0.09261107891798019, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008463402464985847, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008463402464985847, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2386895924806595, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32064216732978823, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029836199060082436, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029836199060082436, "step": 55 }, { "calibration/aurc": 0.4491234760798785, "calibration/batch_distribution_entropy": 0.98299611256218, "calibration/batch_entropy_100bins": 0.9689459410888717, "calibration/batch_entropy_10bins": 0.98299611256218, "calibration/batch_entropy_50bins": 0.9776301185799596, "calibration/batch_uniqueness": 0.9538074206065245, "calibration/buffer_distribution_entropy": 0.9623449924153729, "calibration/buffer_entropy_100bins": 0.8922897436238332, "calibration/buffer_entropy_10bins": 0.9623449924153729, "calibration/buffer_entropy_50bins": 0.9257487085635707, "calibration/confidence_entropy": 0.49867141502553186, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.005859375, "calibration/coverage@15%": 0.009765625, "calibration/coverage@20%": 0.0125, "calibration/coverage@25%": 0.021875, "calibration/coverage@30%": 0.0421875, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.18586777873207352, "calibration/mean_confidence": 0.4387145557850641, "calibration/prompt_uniqueness": 0.891550752796566, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 994.2, "completions/max_terminated_length": 709.2, "completions/mean_length": 144.47421875, "completions/mean_terminated_length": 143.9313934326172, "completions/min_length": 58.0, "completions/min_terminated_length": 58.0, "epoch": 0.192, "grad_norm": 0.0014471631729975343, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 199337964.0, "reward": 0.8112979173660279, "reward_std": 0.1323389947414398, "rewards/accuracy_reward": 0.47412109375, "rewards/brier_reward": 0.7108014822006226, "rewards/format_reward": 0.99892578125, "rewards/frontier_aurc_reward": -0.003819770412519574, "rewards/frontier_coverage_1": 0.06823012800887227, "rewards/frontier_coverage_10": 0.06823012800887227, "rewards/frontier_coverage_15": 0.06823012800887227, "rewards/frontier_coverage_20": 0.06823012800887227, "rewards/frontier_coverage_25": 0.06823012800887227, "rewards/frontier_coverage_5": 0.06823012800887227, "rewards/frontier_ece_reward": 0.008699505100958049, "rewards/frontier_entropy_batch_reward": -0.17938159108161927, "signal/accuracy_reward/centered_abs_mean": 0.148931884765625, "signal/accuracy_reward/group_bin_occupancy": 0.193359375, "signal/accuracy_reward/group_std_mean": 0.19490036964416504, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0744659423828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0744659423828125, "signal/advantage_abs_mean": 0.10477420687675476, "signal/advantage_pre_scale_abs_mean": 0.10477420687675476, "signal/advantage_pre_scale_std": 0.14257683753967285, "signal/advantage_std": 0.14257683753967285, "signal/brier_reward/centered_abs_mean": 0.22218222618103028, "signal/brier_reward/group_bin_occupancy": 0.919140625, "signal/brier_reward/group_std_mean": 0.27116515636444094, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027772778272628786, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.027772778272628786, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_bin_occupancy": 0.129296875, "signal/format_reward/group_std_mean": 0.006076698703691363, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002302809851244092, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375, "signal/frontier_aurc_reward/group_std_mean": 0.003347306279465556, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.122029495192692e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.122029495192692e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_1/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_1/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_10/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_10/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_10/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_15/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_15/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_15/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_20/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_20/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_20/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_25/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_25/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_25/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_5/centered_abs_mean": 0.2844557523727417, "signal/frontier_coverage_5/group_bin_occupancy": 0.9390625, "signal/frontier_coverage_5/group_std_mean": 0.3530541956424713, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005091757886111737, "signal/frontier_ece_reward/centered_abs_mean": 0.061374531686306, "signal/frontier_ece_reward/group_bin_occupancy": 0.8796875, "signal/frontier_ece_reward/group_std_mean": 0.08481642305850982, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00767181646078825, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00767181646078825, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27003463804721833, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3499366283416748, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03375432975590229, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03375432975590229, "step": 60 }, { "calibration/aurc": 0.38886185437113174, "calibration/batch_distribution_entropy": 0.993174546110124, "calibration/batch_entropy_100bins": 0.9755285743495123, "calibration/batch_entropy_10bins": 0.993174546110124, "calibration/batch_entropy_50bins": 0.9853249378435794, "calibration/batch_uniqueness": 0.9561890088248373, "calibration/buffer_distribution_entropy": 0.9696478610556747, "calibration/buffer_entropy_100bins": 0.9073566670205515, "calibration/buffer_entropy_10bins": 0.9696478610556747, "calibration/buffer_entropy_50bins": 0.9373040055132771, "calibration/confidence_entropy": 0.4993493040570547, "calibration/coverage@0%": 0.001954656862745098, "calibration/coverage@1%": 0.001954656862745098, "calibration/coverage@10%": 0.01878440245961398, "calibration/coverage@15%": 0.08023254335980967, "calibration/coverage@20%": 0.12720311240934729, "calibration/coverage@25%": 0.3667257827788649, "calibration/coverage@30%": 0.413671875, "calibration/coverage@5%": 0.001954656862745098, "calibration/ece": 0.23965065800221003, "calibration/mean_confidence": 0.4801674712232713, "calibration/prompt_uniqueness": 0.8904477757544225, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 1357.2, "completions/max_terminated_length": 678.2, "completions/mean_length": 154.43896484375, "completions/mean_terminated_length": 153.49268188476563, "completions/min_length": 62.0, "completions/min_terminated_length": 62.0, "epoch": 0.208, "grad_norm": 0.0015919266734272242, "learning_rate": 1e-06, "loss": 0.0019, "num_tokens": 215951643.0, "reward": 0.8326294660568238, "reward_std": 0.12973762750625611, "rewards/accuracy_reward": 0.519140625, "rewards/brier_reward": 0.7076422810554505, "rewards/format_reward": 0.9990234375, "rewards/frontier_aurc_reward": -0.003565392177551985, "rewards/frontier_coverage_1": 0.030470560118556023, "rewards/frontier_coverage_10": 0.030470560118556023, "rewards/frontier_coverage_15": 0.030470560118556023, "rewards/frontier_coverage_20": 0.030470560118556023, "rewards/frontier_coverage_25": 0.030470560118556023, "rewards/frontier_coverage_5": 0.030470560118556023, "rewards/frontier_ece_reward": 0.01095831673592329, "rewards/frontier_entropy_batch_reward": -0.15589092671871185, "signal/accuracy_reward/centered_abs_mean": 0.142822265625, "signal/accuracy_reward/group_bin_occupancy": 0.19453125, "signal/accuracy_reward/group_std_mean": 0.19114138782024384, "signal/accuracy_reward/group_zero_std_frac": 0.44375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0714111328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0714111328125, "signal/advantage_abs_mean": 0.10268731862306595, "signal/advantage_pre_scale_abs_mean": 0.10268731862306595, "signal/advantage_pre_scale_std": 0.1399999141693115, "signal/advantage_std": 0.1399999141693115, "signal/brier_reward/centered_abs_mean": 0.22231624722480775, "signal/brier_reward/group_bin_occupancy": 0.922265625, "signal/brier_reward/group_std_mean": 0.2717791020870209, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02778953090310097, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02778953090310097, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_bin_occupancy": 0.12890625, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002406009705737233, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375, "signal/frontier_aurc_reward/group_std_mean": 0.0035011733416467905, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3067572551080954e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3067572551080954e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_1/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_1/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_10/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_10/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_10/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_15/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_15/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_15/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_20/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_20/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_20/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_25/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_25/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_25/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_5/centered_abs_mean": 0.2699986696243286, "signal/frontier_coverage_5/group_bin_occupancy": 0.919921875, "signal/frontier_coverage_5/group_std_mean": 0.3401759326457977, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004832976032048464, "signal/frontier_ece_reward/centered_abs_mean": 0.06127958670258522, "signal/frontier_ece_reward/group_bin_occupancy": 0.8984375, "signal/frontier_ece_reward/group_std_mean": 0.08330589979887008, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007659948337823153, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007659948337823153, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24359245598316193, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3275866687297821, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03044905699789524, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03044905699789524, "step": 65 }, { "calibration/aurc": 0.37149284629454293, "calibration/batch_distribution_entropy": 0.9931569295884003, "calibration/batch_entropy_100bins": 0.9783278813046771, "calibration/batch_entropy_10bins": 0.9931569295884003, "calibration/batch_entropy_50bins": 0.9868018130911993, "calibration/batch_uniqueness": 0.9561319831691417, "calibration/buffer_distribution_entropy": 0.9742613412980017, "calibration/buffer_entropy_100bins": 0.9193829808162708, "calibration/buffer_entropy_10bins": 0.9742613412980017, "calibration/buffer_entropy_50bins": 0.946153440671097, "calibration/confidence_entropy": 0.5044726683622951, "calibration/coverage@0%": 0.009770982022946165, "calibration/coverage@1%": 0.009770982022946165, "calibration/coverage@10%": 0.026184111915314067, "calibration/coverage@15%": 0.043773703402593914, "calibration/coverage@20%": 0.12471181494858216, "calibration/coverage@25%": 0.21463975444629907, "calibration/coverage@30%": 0.34204774975058516, "calibration/coverage@5%": 0.011333482022946165, "calibration/ece": 0.17022807751994243, "calibration/mean_confidence": 0.5015854712077894, "calibration/prompt_uniqueness": 0.8873234306350446, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1016.2, "completions/max_terminated_length": 697.0, "completions/mean_length": 160.7646484375, "completions/mean_terminated_length": 160.22604675292968, "completions/min_length": 67.8, "completions/min_terminated_length": 67.8, "epoch": 0.224, "grad_norm": 0.001383577473461628, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 232751057.0, "reward": 0.8240685939788819, "reward_std": 0.12576959878206254, "rewards/accuracy_reward": 0.480078125, "rewards/brier_reward": 0.7404986023902893, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0036113801877945663, "rewards/frontier_coverage_1": 0.08505139946937561, "rewards/frontier_coverage_10": 0.08505139946937561, "rewards/frontier_coverage_15": 0.08505139946937561, "rewards/frontier_coverage_20": 0.08505139946937561, "rewards/frontier_coverage_25": 0.08505139946937561, "rewards/frontier_coverage_5": 0.08505139946937561, "rewards/frontier_ece_reward": 0.015613408572971822, "rewards/frontier_entropy_batch_reward": -0.15448164641857148, "signal/accuracy_reward/centered_abs_mean": 0.13514404296875, "signal/accuracy_reward/group_bin_occupancy": 0.187890625, "signal/accuracy_reward/group_std_mean": 0.177346870303154, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.067572021484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.067572021484375, "signal/advantage_abs_mean": 0.09964470565319061, "signal/advantage_pre_scale_abs_mean": 0.09964470565319061, "signal/advantage_pre_scale_std": 0.13848601579666137, "signal/advantage_std": 0.13848601579666137, "signal/brier_reward/centered_abs_mean": 0.19449081718921662, "signal/brier_reward/group_bin_occupancy": 0.896875, "signal/brier_reward/group_std_mean": 0.24308900237083436, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024311352148652078, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.024311352148652078, "signal/format_reward/centered_abs_mean": 0.000933837890625, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0024258273653686045, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.002517683617770672, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7875, "signal/frontier_aurc_reward/group_std_mean": 0.0036455394700169565, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.506653422140516e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.506653422140516e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_1/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_1/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_10/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_10/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_10/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_15/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_15/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_15/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_20/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_20/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_20/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_25/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_25/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_25/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_5/centered_abs_mean": 0.23284452259540558, "signal/frontier_coverage_5/group_bin_occupancy": 0.909765625, "signal/frontier_coverage_5/group_std_mean": 0.29805226922035216, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041679169051349165, "signal/frontier_ece_reward/centered_abs_mean": 0.0579533688724041, "signal/frontier_ece_reward/group_bin_occupancy": 0.900390625, "signal/frontier_ece_reward/group_std_mean": 0.07829077690839767, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007244171109050512, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007244171109050512, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24184339344501496, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758984375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3229557752609253, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03023042418062687, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03023042418062687, "step": 70 }, { "calibration/aurc": 0.3843023569473395, "calibration/batch_distribution_entropy": 0.9825564335986041, "calibration/batch_entropy_100bins": 0.9674868151684599, "calibration/batch_entropy_10bins": 0.9825564335986041, "calibration/batch_entropy_50bins": 0.9772102320227738, "calibration/batch_uniqueness": 0.9530455243124984, "calibration/buffer_distribution_entropy": 0.9776003008409667, "calibration/buffer_entropy_100bins": 0.9292828890688549, "calibration/buffer_entropy_10bins": 0.9776003008409667, "calibration/buffer_entropy_50bins": 0.9531789644661673, "calibration/confidence_entropy": 0.5097281333912452, "calibration/coverage@0%": 0.01525272137964775, "calibration/coverage@1%": 0.01525272137964775, "calibration/coverage@10%": 0.054781525195694715, "calibration/coverage@15%": 0.10409047822896281, "calibration/coverage@20%": 0.1984008072407045, "calibration/coverage@25%": 0.242578125, "calibration/coverage@30%": 0.265234375, "calibration/coverage@5%": 0.017209668542074362, "calibration/ece": 0.17762837478013666, "calibration/mean_confidence": 0.5141673460767282, "calibration/prompt_uniqueness": 0.8842985822060353, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 882.2, "completions/max_terminated_length": 478.2, "completions/mean_length": 172.72275390625, "completions/mean_terminated_length": 172.45663757324218, "completions/min_length": 72.0, "completions/min_terminated_length": 72.0, "epoch": 0.24, "grad_norm": 0.0014637865824624896, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 249771418.0, "reward": 0.8475449323654175, "reward_std": 0.126870197057724, "rewards/accuracy_reward": 0.54345703125, "rewards/brier_reward": 0.7408711910247803, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0031028116587549447, "rewards/frontier_coverage_1": 0.043854419514536856, "rewards/frontier_coverage_10": 0.043854419514536856, "rewards/frontier_coverage_15": 0.043854419514536856, "rewards/frontier_coverage_20": 0.043854419514536856, "rewards/frontier_coverage_25": 0.043854419514536856, "rewards/frontier_coverage_5": 0.043854419514536856, "rewards/frontier_ece_reward": 0.018962536379694937, "rewards/frontier_entropy_batch_reward": -0.1885848104953766, "signal/accuracy_reward/centered_abs_mean": 0.143426513671875, "signal/accuracy_reward/group_bin_occupancy": 0.1921875, "signal/accuracy_reward/group_std_mean": 0.18838266730308534, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0717132568359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0717132568359375, "signal/advantage_abs_mean": 0.10101936310529709, "signal/advantage_pre_scale_abs_mean": 0.10101936310529709, "signal/advantage_pre_scale_std": 0.14047700464725493, "signal/advantage_std": 0.14047700464725493, "signal/brier_reward/centered_abs_mean": 0.19066874384880067, "signal/brier_reward/group_bin_occupancy": 0.9046875, "signal/brier_reward/group_std_mean": 0.23766070902347564, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023833592981100084, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.023833592981100084, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002466377941891551, "signal/frontier_aurc_reward/group_bin_occupancy": 0.798046875, "signal/frontier_aurc_reward/group_std_mean": 0.0035321788396686315, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4148163578938696e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4148163578938696e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_1/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_1/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_10/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_10/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_10/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_15/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_15/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_15/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_20/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_20/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_20/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_25/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_25/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_25/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_5/centered_abs_mean": 0.22646748423576354, "signal/frontier_coverage_5/group_bin_occupancy": 0.902734375, "signal/frontier_coverage_5/group_std_mean": 0.2924614608287811, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004053767677396536, "signal/frontier_ece_reward/centered_abs_mean": 0.05426667183637619, "signal/frontier_ece_reward/group_bin_occupancy": 0.887890625, "signal/frontier_ece_reward/group_std_mean": 0.07379693686962127, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006783333979547024, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006783333979547024, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799087405204773, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35980047583580016, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034988592565059665, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034988592565059665, "step": 75 }, { "calibration/aurc": 0.29483841819292744, "calibration/batch_distribution_entropy": 0.9777187935073307, "calibration/batch_entropy_100bins": 0.9667154925944665, "calibration/batch_entropy_10bins": 0.9777187935073307, "calibration/batch_entropy_50bins": 0.9757633762213546, "calibration/batch_uniqueness": 0.9522694542284718, "calibration/buffer_distribution_entropy": 0.9800587855542565, "calibration/buffer_entropy_100bins": 0.9373105642982278, "calibration/buffer_entropy_10bins": 0.9800587855542565, "calibration/buffer_entropy_50bins": 0.958673701749106, "calibration/confidence_entropy": 0.4694770219979284, "calibration/coverage@0%": 0.016024798189823875, "calibration/coverage@1%": 0.016024798189823875, "calibration/coverage@10%": 0.12074134662426614, "calibration/coverage@15%": 0.23877048679060664, "calibration/coverage@20%": 0.35218704134050877, "calibration/coverage@25%": 0.449902917074364, "calibration/coverage@30%": 0.5347044704011742, "calibration/coverage@5%": 0.016806048189823876, "calibration/ece": 0.145108984662015, "calibration/mean_confidence": 0.4939348278541888, "calibration/prompt_uniqueness": 0.8741655522405047, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 939.4, "completions/max_terminated_length": 736.8, "completions/mean_length": 176.50556640625, "completions/mean_terminated_length": 176.3722412109375, "completions/min_length": 82.2, "completions/min_terminated_length": 82.2, "epoch": 0.256, "grad_norm": 0.0011565532768145204, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 266633651.0, "reward": 0.8428894639015198, "reward_std": 0.11935372054576873, "rewards/accuracy_reward": 0.51728515625, "rewards/brier_reward": 0.7649436235427857, "rewards/format_reward": 0.9994140625, "rewards/frontier_aurc_reward": -0.002899883547797799, "rewards/frontier_coverage_1": 0.09639933593571186, "rewards/frontier_coverage_10": 0.09639933593571186, "rewards/frontier_coverage_15": 0.09639933593571186, "rewards/frontier_coverage_20": 0.09639933593571186, "rewards/frontier_coverage_25": 0.09639933593571186, "rewards/frontier_coverage_5": 0.09639933593571186, "rewards/frontier_ece_reward": 0.023540638387203217, "rewards/frontier_entropy_batch_reward": -0.19457639753818512, "signal/accuracy_reward/centered_abs_mean": 0.130194091796875, "signal/accuracy_reward/group_bin_occupancy": 0.186328125, "signal/accuracy_reward/group_std_mean": 0.17149793207645417, "signal/accuracy_reward/group_zero_std_frac": 0.509375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0650970458984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0650970458984375, "signal/advantage_abs_mean": 0.0930386334657669, "signal/advantage_pre_scale_abs_mean": 0.0930386334657669, "signal/advantage_pre_scale_std": 0.13454234898090361, "signal/advantage_std": 0.13454234898090361, "signal/brier_reward/centered_abs_mean": 0.17940108776092528, "signal/brier_reward/group_bin_occupancy": 0.879296875, "signal/brier_reward/group_std_mean": 0.2269110530614853, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02242513597011566, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02242513597011566, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_bin_occupancy": 0.12734375, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024126087315380572, "signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875, "signal/frontier_aurc_reward/group_std_mean": 0.003546612523496151, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.318569772294722e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.318569772294722e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_1/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_1/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_10/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_10/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_10/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_15/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_15/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_15/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_20/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_20/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_20/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_25/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_25/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_25/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_5/centered_abs_mean": 0.22456566095352173, "signal/frontier_coverage_5/group_bin_occupancy": 0.897265625, "signal/frontier_coverage_5/group_std_mean": 0.28588892221450807, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004019725229591131, "signal/frontier_ece_reward/centered_abs_mean": 0.050846466422080995, "signal/frontier_ece_reward/group_bin_occupancy": 0.88046875, "signal/frontier_ece_reward/group_std_mean": 0.06863305419683456, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006355808302760124, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006355808302760124, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27906052470207215, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3593753814697266, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03488256558775902, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03488256558775902, "step": 80 }, { "calibration/aurc": 0.396818313806098, "calibration/batch_distribution_entropy": 0.9904097057091761, "calibration/batch_entropy_100bins": 0.9737173977651677, "calibration/batch_entropy_10bins": 0.9904097057091761, "calibration/batch_entropy_50bins": 0.9832208433275686, "calibration/batch_uniqueness": 0.9558441162109375, "calibration/buffer_distribution_entropy": 0.9823054908036963, "calibration/buffer_entropy_100bins": 0.9438449914736629, "calibration/buffer_entropy_10bins": 0.9823054908036963, "calibration/buffer_entropy_50bins": 0.9631353066861775, "calibration/confidence_entropy": 0.48614395905828134, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.047265625, "calibration/coverage@15%": 0.06640625, "calibration/coverage@20%": 0.16875, "calibration/coverage@25%": 0.22734375, "calibration/coverage@30%": 0.33125, "calibration/coverage@5%": 0.01328125, "calibration/ece": 0.14752605867831683, "calibration/mean_confidence": 0.49425239867748705, "calibration/prompt_uniqueness": 0.879052734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 687.0, "completions/max_terminated_length": 687.0, "completions/mean_length": 186.03525390625, "completions/mean_terminated_length": 186.03525390625, "completions/min_length": 78.2, "completions/min_terminated_length": 78.2, "epoch": 0.272, "grad_norm": 0.0011228998191654682, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 283504348.0, "reward": 0.8343194007873536, "reward_std": 0.1147305577993393, "rewards/accuracy_reward": 0.49560546875, "rewards/brier_reward": 0.7591516852378846, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0030454121064394713, "rewards/frontier_coverage_1": 0.1056124085560441, "rewards/frontier_coverage_10": 0.1056124085560441, "rewards/frontier_coverage_15": 0.1056124085560441, "rewards/frontier_coverage_20": 0.1056124085560441, "rewards/frontier_coverage_25": 0.1056124085560441, "rewards/frontier_coverage_5": 0.1056124085560441, "rewards/frontier_ece_reward": 0.019984208419919013, "rewards/frontier_entropy_batch_reward": -0.17652736306190492, "signal/accuracy_reward/centered_abs_mean": 0.127191162109375, "signal/accuracy_reward/group_bin_occupancy": 0.183984375, "signal/accuracy_reward/group_std_mean": 0.16545215547084807, "signal/accuracy_reward/group_zero_std_frac": 0.528125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0635955810546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0635955810546875, "signal/advantage_abs_mean": 0.09112635999917984, "signal/advantage_pre_scale_abs_mean": 0.09112635999917984, "signal/advantage_pre_scale_std": 0.1298075333237648, "signal/advantage_std": 0.1298075333237648, "signal/brier_reward/centered_abs_mean": 0.17884210646152496, "signal/brier_reward/group_bin_occupancy": 0.869140625, "signal/brier_reward/group_std_mean": 0.22582717537879943, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02235526330769062, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.02235526330769062, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023218464106321336, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7765625, "signal/frontier_aurc_reward/group_std_mean": 0.003383269626647234, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1561049147276205e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1561049147276205e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_1/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_1/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_10/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_10/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_10/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_15/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_15/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_15/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_20/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_20/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_20/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_25/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_25/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_25/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_5/centered_abs_mean": 0.22928011119365693, "signal/frontier_coverage_5/group_bin_occupancy": 0.894921875, "signal/frontier_coverage_5/group_std_mean": 0.2927806079387665, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004104113671928644, "signal/frontier_ece_reward/centered_abs_mean": 0.04553831294178963, "signal/frontier_ece_reward/group_bin_occupancy": 0.86875, "signal/frontier_ece_reward/group_std_mean": 0.0625507190823555, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005692289117723704, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005692289117723704, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.262398362159729, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34030004143714904, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032799795269966125, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032799795269966125, "step": 85 }, { "calibration/aurc": 0.3438739028381422, "calibration/batch_distribution_entropy": 0.9872127090769016, "calibration/batch_entropy_100bins": 0.9745401927062021, "calibration/batch_entropy_10bins": 0.9872127090769016, "calibration/batch_entropy_50bins": 0.9833082642512846, "calibration/batch_uniqueness": 0.955206298828125, "calibration/buffer_distribution_entropy": 0.9842534881328724, "calibration/buffer_entropy_100bins": 0.9495188377364201, "calibration/buffer_entropy_10bins": 0.9842534881328724, "calibration/buffer_entropy_50bins": 0.9670404366119122, "calibration/confidence_entropy": 0.4957282055397302, "calibration/coverage@0%": 0.0046875, "calibration/coverage@1%": 0.0046875, "calibration/coverage@10%": 0.06484375, "calibration/coverage@15%": 0.106640625, "calibration/coverage@20%": 0.14296875, "calibration/coverage@25%": 0.212890625, "calibration/coverage@30%": 0.2953125, "calibration/coverage@5%": 0.01953125, "calibration/ece": 0.13413243536778918, "calibration/mean_confidence": 0.5377243429204241, "calibration/prompt_uniqueness": 0.878662109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 754.2, "completions/max_terminated_length": 565.8, "completions/mean_length": 182.9107421875, "completions/mean_terminated_length": 182.64695739746094, "completions/min_length": 80.4, "completions/min_terminated_length": 80.4, "epoch": 0.288, "grad_norm": 0.0012018464040011168, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 300335530.0, "reward": 0.8439226388931275, "reward_std": 0.11675801277160644, "rewards/accuracy_reward": 0.51513671875, "rewards/brier_reward": 0.7593789458274841, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.0030923429410904648, "rewards/frontier_coverage_1": 0.10312287509441376, "rewards/frontier_coverage_10": 0.10312287509441376, "rewards/frontier_coverage_15": 0.10312287509441376, "rewards/frontier_coverage_20": 0.10312287509441376, "rewards/frontier_coverage_25": 0.10312287509441376, "rewards/frontier_coverage_5": 0.10312287509441376, "rewards/frontier_ece_reward": 0.01944323191419244, "rewards/frontier_entropy_batch_reward": -0.1730232924222946, "signal/accuracy_reward/centered_abs_mean": 0.134979248046875, "signal/accuracy_reward/group_bin_occupancy": 0.1890625, "signal/accuracy_reward/group_std_mean": 0.17875251770019532, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0674896240234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0674896240234375, "signal/advantage_abs_mean": 0.0906929224729538, "signal/advantage_pre_scale_abs_mean": 0.0906929224729538, "signal/advantage_pre_scale_std": 0.13175700902938842, "signal/advantage_std": 0.13175700902938842, "signal/brier_reward/centered_abs_mean": 0.17308151721954346, "signal/brier_reward/group_bin_occupancy": 0.875, "signal/brier_reward/group_std_mean": 0.2171693116426468, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021635189652442932, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.021635189652442932, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_bin_occupancy": 0.128125, "signal/format_reward/group_std_mean": 0.004419417353346944, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_aurc_reward/centered_abs_mean": 0.002408099686726928, "signal/frontier_aurc_reward/group_bin_occupancy": 0.76796875, "signal/frontier_aurc_reward/group_std_mean": 0.0035015761386603117, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.310498406994157e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.310498406994157e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_1/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_1/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_10/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_10/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_10/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_15/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_15/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_15/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_20/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_20/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_20/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_25/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_25/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_25/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_5/centered_abs_mean": 0.21902143955230713, "signal/frontier_coverage_5/group_bin_occupancy": 0.885546875, "signal/frontier_coverage_5/group_std_mean": 0.27995782494544985, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003920483542606235, "signal/frontier_ece_reward/centered_abs_mean": 0.043396206200122835, "signal/frontier_ece_reward/group_bin_occupancy": 0.865625, "signal/frontier_ece_reward/group_std_mean": 0.05951143801212311, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005424525775015354, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005424525775015354, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2536126673221588, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.330656635761261, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03170158341526985, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03170158341526985, "step": 90 }, { "calibration/aurc": 0.30520045908732824, "calibration/batch_distribution_entropy": 0.9809410439481454, "calibration/batch_entropy_100bins": 0.9710889487139797, "calibration/batch_entropy_10bins": 0.9809410439481454, "calibration/batch_entropy_50bins": 0.979682041413058, "calibration/batch_uniqueness": 0.9534674290791777, "calibration/buffer_distribution_entropy": 0.9852726079888707, "calibration/buffer_entropy_100bins": 0.9542494347166681, "calibration/buffer_entropy_10bins": 0.9852726079888707, "calibration/buffer_entropy_50bins": 0.970168674026028, "calibration/confidence_entropy": 0.487046948757095, "calibration/coverage@0%": 0.00703660102739726, "calibration/coverage@1%": 0.00703660102739726, "calibration/coverage@10%": 0.06920177959882583, "calibration/coverage@15%": 0.11181124633072406, "calibration/coverage@20%": 0.20913420376712327, "calibration/coverage@25%": 0.3654484160958904, "calibration/coverage@30%": 0.5100178877201565, "calibration/coverage@5%": 0.00703660102739726, "calibration/ece": 0.11105027567237662, "calibration/mean_confidence": 0.5574284681401743, "calibration/prompt_uniqueness": 0.8712910948881373, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 663.8, "completions/max_terminated_length": 663.8, "completions/mean_length": 184.992578125, "completions/mean_terminated_length": 184.992578125, "completions/min_length": 77.6, "completions/min_terminated_length": 77.6, "epoch": 0.304, "grad_norm": 0.0013152319006621838, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 317159806.0, "reward": 0.8400702953338623, "reward_std": 0.11409472972154618, "rewards/accuracy_reward": 0.50537109375, "rewards/brier_reward": 0.7638264536857605, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0030908068176358936, "rewards/frontier_coverage_1": 0.1076532706618309, "rewards/frontier_coverage_10": 0.1076532706618309, "rewards/frontier_coverage_15": 0.1076532706618309, "rewards/frontier_coverage_20": 0.1076532706618309, "rewards/frontier_coverage_25": 0.1076532706618309, "rewards/frontier_coverage_5": 0.1076532706618309, "rewards/frontier_ece_reward": 0.019258670136332513, "rewards/frontier_entropy_batch_reward": -0.1748884290456772, "signal/accuracy_reward/centered_abs_mean": 0.126800537109375, "signal/accuracy_reward/group_bin_occupancy": 0.186328125, "signal/accuracy_reward/group_std_mean": 0.1693242758512497, "signal/accuracy_reward/group_zero_std_frac": 0.509375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0634002685546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0634002685546875, "signal/advantage_abs_mean": 0.08780478239059449, "signal/advantage_pre_scale_abs_mean": 0.08780478239059449, "signal/advantage_pre_scale_std": 0.1284501165151596, "signal/advantage_std": 0.1284501165151596, "signal/brier_reward/centered_abs_mean": 0.1634564906358719, "signal/brier_reward/group_bin_occupancy": 0.869140625, "signal/brier_reward/group_std_mean": 0.207411727309227, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020432061329483987, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.020432061329483987, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002385811135172844, "signal/frontier_aurc_reward/group_bin_occupancy": 0.784375, "signal/frontier_aurc_reward/group_std_mean": 0.0034592232666909696, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2706017120508476e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2706017120508476e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_1/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_1/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_10/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_10/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_10/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_15/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_15/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_15/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_20/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_20/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_20/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_25/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_25/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_25/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_5/centered_abs_mean": 0.20289005041122438, "signal/frontier_coverage_5/group_bin_occupancy": 0.879296875, "signal/frontier_coverage_5/group_std_mean": 0.26228512823581696, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003631731867790222, "signal/frontier_ece_reward/centered_abs_mean": 0.041925042122602466, "signal/frontier_ece_reward/group_bin_occupancy": 0.84453125, "signal/frontier_ece_reward/group_std_mean": 0.05757189467549324, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005240630265325308, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005240630265325308, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25924491286277773, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.754296875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3380768716335297, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032405614107847217, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032405614107847217, "step": 95 }, { "calibration/aurc": 0.2647507481106945, "calibration/batch_distribution_entropy": 0.9905705255561216, "calibration/batch_entropy_100bins": 0.9749278642951309, "calibration/batch_entropy_10bins": 0.9905705255561216, "calibration/batch_entropy_50bins": 0.9843588547973605, "calibration/batch_uniqueness": 0.9554248612111863, "calibration/buffer_distribution_entropy": 0.9859378110837035, "calibration/buffer_entropy_100bins": 0.9583101311689097, "calibration/buffer_entropy_10bins": 0.9859378110837035, "calibration/buffer_entropy_50bins": 0.9726730927304548, "calibration/confidence_entropy": 0.48981557023424893, "calibration/coverage@0%": 0.0453491927592955, "calibration/coverage@1%": 0.0453491927592955, "calibration/coverage@10%": 0.24703170865949117, "calibration/coverage@15%": 0.34710127201565555, "calibration/coverage@20%": 0.41707207069471625, "calibration/coverage@25%": 0.49878913894324856, "calibration/coverage@30%": 0.572265625, "calibration/coverage@5%": 0.09306124633072407, "calibration/ece": 0.1539305622574378, "calibration/mean_confidence": 0.5290979156854486, "calibration/prompt_uniqueness": 0.8721267620805151, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 890.6, "completions/max_terminated_length": 481.2, "completions/mean_length": 185.70908203125, "completions/mean_terminated_length": 185.3141632080078, "completions/min_length": 92.6, "completions/min_terminated_length": 92.6, "epoch": 0.32, "grad_norm": 0.0009060048614628613, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 334150171.0, "reward": 0.8524771690368652, "reward_std": 0.09913994669914246, "rewards/accuracy_reward": 0.5279296875, "rewards/brier_reward": 0.7727201581001282, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.00271282319445163, "rewards/frontier_coverage_1": 0.09441364500671626, "rewards/frontier_coverage_10": 0.09441364500671626, "rewards/frontier_coverage_15": 0.09441364500671626, "rewards/frontier_coverage_20": 0.09441364500671626, "rewards/frontier_coverage_25": 0.09441364500671626, "rewards/frontier_coverage_5": 0.09441364500671626, "rewards/frontier_ece_reward": 0.02004805374890566, "rewards/frontier_entropy_batch_reward": -0.16383886635303496, "signal/accuracy_reward/centered_abs_mean": 0.0960205078125, "signal/accuracy_reward/group_bin_occupancy": 0.178125, "signal/accuracy_reward/group_std_mean": 0.13594979792833328, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04801025390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04801025390625, "signal/advantage_abs_mean": 0.07516542375087738, "signal/advantage_pre_scale_abs_mean": 0.07516542375087738, "signal/advantage_pre_scale_std": 0.11188042908906937, "signal/advantage_std": 0.11188042908906937, "signal/brier_reward/centered_abs_mean": 0.15651972889900206, "signal/brier_reward/group_bin_occupancy": 0.848828125, "signal/brier_reward/group_std_mean": 0.20049535632133483, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019564966112375258, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019564966112375258, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002122000069357455, "signal/frontier_aurc_reward/group_bin_occupancy": 0.769921875, "signal/frontier_aurc_reward/group_std_mean": 0.003134680772200227, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.79838005756028e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.79838005756028e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_1/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_1/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_10/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_15/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_20/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_20/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_20/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_25/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_25/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_25/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_5/centered_abs_mean": 0.19597499668598176, "signal/frontier_coverage_5/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_5/group_std_mean": 0.25266251862049105, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035079522524029015, "signal/frontier_ece_reward/centered_abs_mean": 0.038548742234706876, "signal/frontier_ece_reward/group_bin_occupancy": 0.83828125, "signal/frontier_ece_reward/group_std_mean": 0.05287352129817009, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048185927793383595, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048185927793383595, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24952644407749175, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32922094464302065, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03119080550968647, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03119080550968647, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.5577346842025488, "eval_calibration/batch_distribution_entropy": 0.9293753890186642, "eval_calibration/batch_entropy_100bins": 0.7052278361140917, "eval_calibration/batch_entropy_10bins": 0.9293753890186642, "eval_calibration/batch_entropy_50bins": 0.7897002923025568, "eval_calibration/batch_uniqueness": 0.8984375, "eval_calibration/buffer_distribution_entropy": 0.9865955502836785, "eval_calibration/buffer_entropy_100bins": 0.9606136359667189, "eval_calibration/buffer_entropy_10bins": 0.9865955502836785, "eval_calibration/buffer_entropy_50bins": 0.9741520355969424, "eval_calibration/confidence_entropy": 0.49245299957964217, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.0625, "eval_calibration/coverage@30%": 0.0625, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.2602928906962808, "eval_calibration/mean_confidence": 0.4507762260644716, "eval_calibration/prompt_uniqueness": 0.8984375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 387.5, "eval_completions/max_terminated_length": 387.5, "eval_completions/mean_length": 186.24878692626953, "eval_completions/mean_terminated_length": 186.24878692626953, "eval_completions/min_length": 94.5, "eval_completions/min_terminated_length": 94.5, "eval_loss": 0.0, "eval_num_tokens": 334150171.0, "eval_reward": 0.6920952647924423, "eval_reward_std": 0.21343515813350677, "eval_rewards/accuracy_reward": 0.400390625, "eval_rewards/brier_reward": 0.76617431640625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0032768649398349226, "eval_rewards/frontier_coverage_1": 0.17960688099265099, "eval_rewards/frontier_coverage_10": 0.17960688099265099, "eval_rewards/frontier_coverage_15": 0.17960688099265099, "eval_rewards/frontier_coverage_20": 0.17960688099265099, "eval_rewards/frontier_coverage_25": 0.17960688099265099, "eval_rewards/frontier_coverage_5": 0.17960688099265099, "eval_rewards/frontier_ece_reward": 0.015176349552348256, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.5399, "eval_samples_per_second": 24.343, "eval_signal/accuracy_reward/centered_abs_mean": 0.4666748046875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49022945761680603, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23333740234375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23333740234375, "eval_signal/advantage_abs_mean": 0.19233160465955734, "eval_signal/advantage_pre_scale_abs_mean": 0.19233160465955734, "eval_signal/advantage_pre_scale_std": 0.21115415170788765, "eval_signal/advantage_std": 0.21115415170788765, "eval_signal/brier_reward/centered_abs_mean": 0.20825786143541336, "eval_signal/brier_reward/group_bin_occupancy": 0.9296875, "eval_signal/brier_reward/group_std_mean": 0.2583780698478222, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02603223267942667, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02603223267942667, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002897722239140421, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8359375, "eval_signal/frontier_aurc_reward/group_std_mean": 0.0046058918233029544, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.186922862776555e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.186922862776555e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_1/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_10/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_15/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_20/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.39053118973970413, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_5/group_std_mean": 0.47852831333875656, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0069905080599710345, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.03973545506596565, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7734375, "eval_signal/frontier_ece_reward/group_std_mean": 0.06233951635658741, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0049669318832457066, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0049669318832457066, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.195, "step": 100 }, { "calibration/aurc": 0.3080561844853652, "calibration/batch_distribution_entropy": 0.9775665099409345, "calibration/batch_entropy_100bins": 0.9680939529103568, "calibration/batch_entropy_10bins": 0.9775665099409345, "calibration/batch_entropy_50bins": 0.9760559175224444, "calibration/batch_uniqueness": 0.953204345703125, "calibration/buffer_distribution_entropy": 0.9886758278700819, "calibration/buffer_entropy_100bins": 0.9646465788301135, "calibration/buffer_entropy_10bins": 0.9886758278700819, "calibration/buffer_entropy_50bins": 0.9770686160222896, "calibration/confidence_entropy": 0.5016109504275073, "calibration/coverage@0%": 0.005078125, "calibration/coverage@1%": 0.005078125, "calibration/coverage@10%": 0.04765625, "calibration/coverage@15%": 0.087109375, "calibration/coverage@20%": 0.164453125, "calibration/coverage@25%": 0.394921875, "calibration/coverage@30%": 0.550390625, "calibration/coverage@5%": 0.005078125, "calibration/ece": 0.14772412802918822, "calibration/mean_confidence": 0.44700384713572605, "calibration/prompt_uniqueness": 0.86708984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 693.6, "completions/max_terminated_length": 462.4, "completions/mean_length": 185.40810546875, "completions/mean_terminated_length": 185.27640075683593, "completions/min_length": 85.0, "completions/min_terminated_length": 85.0, "epoch": 0.336, "grad_norm": 0.0010472203139215708, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 350771182.0, "reward": 0.8507241845130921, "reward_std": 0.10451295822858811, "rewards/accuracy_reward": 0.53642578125, "rewards/brier_reward": 0.7645717978477478, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0025647633709013464, "rewards/frontier_coverage_1": 0.0841044221073389, "rewards/frontier_coverage_10": 0.0841044221073389, "rewards/frontier_coverage_15": 0.0841044221073389, "rewards/frontier_coverage_20": 0.0841044221073389, "rewards/frontier_coverage_25": 0.0841044221073389, "rewards/frontier_coverage_5": 0.0841044221073389, "rewards/frontier_ece_reward": 0.017082036286592484, "rewards/frontier_entropy_batch_reward": -0.1930681586265564, "signal/accuracy_reward/centered_abs_mean": 0.114898681640625, "signal/accuracy_reward/group_bin_occupancy": 0.18046875, "signal/accuracy_reward/group_std_mean": 0.1537907287478447, "signal/accuracy_reward/group_zero_std_frac": 0.55625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574493408203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0574493408203125, "signal/advantage_abs_mean": 0.08221976161003113, "signal/advantage_pre_scale_abs_mean": 0.08221976161003113, "signal/advantage_pre_scale_std": 0.11908840835094452, "signal/advantage_std": 0.11908840835094452, "signal/brier_reward/centered_abs_mean": 0.15783025622367858, "signal/brier_reward/group_bin_occupancy": 0.868359375, "signal/brier_reward/group_std_mean": 0.1996555894613266, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019728782027959822, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.019728782027959822, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.001953614945523441, "signal/frontier_aurc_reward/group_bin_occupancy": 0.771875, "signal/frontier_aurc_reward/group_std_mean": 0.0028803437016904354, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.496970675769262e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.496970675769262e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_1/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_1/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_10/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_10/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_10/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_15/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_15/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_15/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_20/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_20/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_20/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_25/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_25/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_25/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_5/centered_abs_mean": 0.20813391208648682, "signal/frontier_coverage_5/group_bin_occupancy": 0.8890625, "signal/frontier_coverage_5/group_std_mean": 0.2666136801242828, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003725596936419606, "signal/frontier_ece_reward/centered_abs_mean": 0.033592797070741656, "signal/frontier_ece_reward/group_bin_occupancy": 0.826171875, "signal/frontier_ece_reward/group_std_mean": 0.0477225124835968, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004199099633842707, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004199099633842707, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27311921715736387, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35031378269195557, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034139902144670484, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034139902144670484, "step": 105 }, { "calibration/aurc": 0.3112573154501758, "calibration/batch_distribution_entropy": 0.9691682173397554, "calibration/batch_entropy_100bins": 0.9620743358290158, "calibration/batch_entropy_10bins": 0.9691682173397554, "calibration/batch_entropy_50bins": 0.968651729542121, "calibration/batch_uniqueness": 0.9505657025562766, "calibration/buffer_distribution_entropy": 0.9942777995898278, "calibration/buffer_entropy_100bins": 0.9773292841249249, "calibration/buffer_entropy_10bins": 0.9942777995898278, "calibration/buffer_entropy_50bins": 0.9859843966922479, "calibration/confidence_entropy": 0.47586445065632177, "calibration/coverage@0%": 0.03093057514101531, "calibration/coverage@1%": 0.03093057514101531, "calibration/coverage@10%": 0.19166808761175705, "calibration/coverage@15%": 0.32717660608764054, "calibration/coverage@20%": 0.3968004903159894, "calibration/coverage@25%": 0.4562750224233529, "calibration/coverage@30%": 0.5329890749107863, "calibration/coverage@5%": 0.09618102720540271, "calibration/ece": 0.1232984263006351, "calibration/mean_confidence": 0.44606582673910006, "calibration/prompt_uniqueness": 0.8618793594725546, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 982.8, "completions/max_terminated_length": 697.4, "completions/mean_length": 189.2787109375, "completions/mean_terminated_length": 188.88477478027343, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 0.352, "grad_norm": 0.0011104086879640818, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 367969812.0, "reward": 0.8267355799674988, "reward_std": 0.10339633971452714, "rewards/accuracy_reward": 0.4732421875, "rewards/brier_reward": 0.7793355941772461, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0028707799036055805, "rewards/frontier_coverage_1": 0.13734577894210814, "rewards/frontier_coverage_10": 0.13734577894210814, "rewards/frontier_coverage_15": 0.13734577894210814, "rewards/frontier_coverage_20": 0.13734577894210814, "rewards/frontier_coverage_25": 0.13734577894210814, "rewards/frontier_coverage_5": 0.13734577894210814, "rewards/frontier_ece_reward": 0.01663174610584974, "rewards/frontier_entropy_batch_reward": -0.19108545184135436, "signal/accuracy_reward/centered_abs_mean": 0.10980224609375, "signal/accuracy_reward/group_bin_occupancy": 0.1765625, "signal/accuracy_reward/group_std_mean": 0.14368323981761932, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054901123046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.054901123046875, "signal/advantage_abs_mean": 0.08111239075660706, "signal/advantage_pre_scale_abs_mean": 0.08111239075660706, "signal/advantage_pre_scale_std": 0.1185634657740593, "signal/advantage_std": 0.1185634657740593, "signal/brier_reward/centered_abs_mean": 0.15005984008312226, "signal/brier_reward/group_bin_occupancy": 0.854296875, "signal/brier_reward/group_std_mean": 0.19159983992576599, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018757480010390282, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018757480010390282, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0021185804391279815, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7625, "signal/frontier_aurc_reward/group_std_mean": 0.0031617959029972553, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.792258794419467e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.792258794419467e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_1/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_1/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_10/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_10/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_10/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_15/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_15/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_15/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_20/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_20/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_20/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_25/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_25/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_25/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_5/centered_abs_mean": 0.20387679040431977, "signal/frontier_coverage_5/group_bin_occupancy": 0.888671875, "signal/frontier_coverage_5/group_std_mean": 0.2581924706697464, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003649394493550062, "signal/frontier_ece_reward/centered_abs_mean": 0.030297876521945, "signal/frontier_ece_reward/group_bin_occupancy": 0.841015625, "signal/frontier_ece_reward/group_std_mean": 0.04242234602570534, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003787234565243125, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003787234565243125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2668303608894348, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34631708860397337, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335379511117935, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335379511117935, "step": 110 }, { "calibration/aurc": 0.3787179681778446, "calibration/batch_distribution_entropy": 0.9810928602094272, "calibration/batch_entropy_100bins": 0.9679847491523969, "calibration/batch_entropy_10bins": 0.9810928602094272, "calibration/batch_entropy_50bins": 0.9765758073335764, "calibration/batch_uniqueness": 0.9527984619140625, "calibration/buffer_distribution_entropy": 0.9977757450664437, "calibration/buffer_entropy_100bins": 0.9878115860267597, "calibration/buffer_entropy_10bins": 0.9977757450664437, "calibration/buffer_entropy_50bins": 0.992834295234476, "calibration/confidence_entropy": 0.4810720543670087, "calibration/coverage@0%": 0.00625, "calibration/coverage@1%": 0.00625, "calibration/coverage@10%": 0.044921875, "calibration/coverage@15%": 0.07109375, "calibration/coverage@20%": 0.137109375, "calibration/coverage@25%": 0.33671875, "calibration/coverage@30%": 0.45859375, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.141736657577259, "calibration/mean_confidence": 0.5299977859351189, "calibration/prompt_uniqueness": 0.8634765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 894.6, "completions/max_terminated_length": 514.8, "completions/mean_length": 190.746875, "completions/mean_terminated_length": 190.48418273925782, "completions/min_length": 90.2, "completions/min_terminated_length": 90.2, "epoch": 0.368, "grad_norm": 0.0009830680210143328, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 384988532.0, "reward": 0.8328436970710754, "reward_std": 0.10252733081579209, "rewards/accuracy_reward": 0.49130859375, "rewards/brier_reward": 0.774866783618927, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0033453899435698987, "rewards/frontier_coverage_1": 0.1207592561841011, "rewards/frontier_coverage_10": 0.1207592561841011, "rewards/frontier_coverage_15": 0.1207592561841011, "rewards/frontier_coverage_20": 0.1207592561841011, "rewards/frontier_coverage_25": 0.1207592561841011, "rewards/frontier_coverage_5": 0.1207592561841011, "rewards/frontier_ece_reward": 0.015102808736264706, "rewards/frontier_entropy_batch_reward": -0.19416911602020265, "signal/accuracy_reward/centered_abs_mean": 0.107684326171875, "signal/accuracy_reward/group_bin_occupancy": 0.180078125, "signal/accuracy_reward/group_std_mean": 0.14609776586294174, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0538421630859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0538421630859375, "signal/advantage_abs_mean": 0.07987660020589829, "signal/advantage_pre_scale_abs_mean": 0.07987660020589829, "signal/advantage_pre_scale_std": 0.11792214959859848, "signal/advantage_std": 0.11792214959859848, "signal/brier_reward/centered_abs_mean": 0.14661412835121154, "signal/brier_reward/group_bin_occupancy": 0.866796875, "signal/brier_reward/group_std_mean": 0.18781245350837708, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018326766043901443, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.018326766043901443, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028235503938049077, "signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875, "signal/frontier_aurc_reward/group_std_mean": 0.004136141994968057, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.054155117250048e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.054155117250048e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_1/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_1/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_10/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_10/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_10/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_15/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_15/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_15/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_20/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_20/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_20/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_25/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_25/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_25/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_5/centered_abs_mean": 0.1856350988149643, "signal/frontier_coverage_5/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_5/group_std_mean": 0.23784518837928773, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00332286823540926, "signal/frontier_ece_reward/centered_abs_mean": 0.029052532091736794, "signal/frontier_ece_reward/group_bin_occupancy": 0.865234375, "signal/frontier_ece_reward/group_std_mean": 0.03975553885102272, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036315665114670993, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036315665114670993, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2735773980617523, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35312792658805847, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03419717475771904, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03419717475771904, "step": 115 }, { "calibration/aurc": 0.3436694397661129, "calibration/batch_distribution_entropy": 0.9729021229483618, "calibration/batch_entropy_100bins": 0.9666308319358716, "calibration/batch_entropy_10bins": 0.9729021229483618, "calibration/batch_entropy_50bins": 0.9741625269875778, "calibration/batch_uniqueness": 0.9517198658975239, "calibration/buffer_distribution_entropy": 0.9991215376397509, "calibration/buffer_entropy_100bins": 0.9948472735016413, "calibration/buffer_entropy_10bins": 0.9991215376397509, "calibration/buffer_entropy_50bins": 0.9970063375341864, "calibration/confidence_entropy": 0.459402213935696, "calibration/coverage@0%": 0.019553418542074364, "calibration/coverage@1%": 0.019553418542074364, "calibration/coverage@10%": 0.1654216609589041, "calibration/coverage@15%": 0.2588284307729941, "calibration/coverage@20%": 0.295582344667319, "calibration/coverage@25%": 0.34835952788649704, "calibration/coverage@30%": 0.4058150379158512, "calibration/coverage@5%": 0.09621147260273973, "calibration/ece": 0.14823283473979842, "calibration/mean_confidence": 0.46903977021675436, "calibration/prompt_uniqueness": 0.8525091355846774, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1344.8, "completions/max_terminated_length": 674.6, "completions/mean_length": 192.716796875, "completions/mean_terminated_length": 192.05948486328126, "completions/min_length": 96.8, "completions/min_terminated_length": 96.8, "epoch": 0.384, "grad_norm": 0.0008304574876092374, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 401818464.0, "reward": 0.8498589992523193, "reward_std": 0.10006450712680817, "rewards/accuracy_reward": 0.526953125, "rewards/brier_reward": 0.7873589873313904, "rewards/format_reward": 0.99931640625, "rewards/frontier_aurc_reward": -0.003031095629557967, "rewards/frontier_coverage_1": 0.11328938379883766, "rewards/frontier_coverage_10": 0.11328938379883766, "rewards/frontier_coverage_15": 0.11328938379883766, "rewards/frontier_coverage_20": 0.11328938379883766, "rewards/frontier_coverage_25": 0.11328938379883766, "rewards/frontier_coverage_5": 0.11328938379883766, "rewards/frontier_ece_reward": 0.017132452875375747, "rewards/frontier_entropy_batch_reward": -0.2076016277074814, "signal/accuracy_reward/centered_abs_mean": 0.10501708984375, "signal/accuracy_reward/group_bin_occupancy": 0.177734375, "signal/accuracy_reward/group_std_mean": 0.14193961024284363, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052508544921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052508544921875, "signal/advantage_abs_mean": 0.07748562693595887, "signal/advantage_pre_scale_abs_mean": 0.07748562693595887, "signal/advantage_pre_scale_std": 0.11569896936416627, "signal/advantage_std": 0.11569896936416627, "signal/brier_reward/centered_abs_mean": 0.13543253839015962, "signal/brier_reward/group_bin_occupancy": 0.837890625, "signal/brier_reward/group_std_mean": 0.17658950984477997, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016929067298769952, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016929067298769952, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_bin_occupancy": 0.127734375, "signal/format_reward/group_std_mean": 0.003866990143433213, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_aurc_reward/centered_abs_mean": 0.0030818260740488766, "signal/frontier_aurc_reward/group_bin_occupancy": 0.763671875, "signal/frontier_aurc_reward/group_std_mean": 0.004590557329356671, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5164685181807724e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5164685181807724e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_1/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_1/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_10/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_10/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_10/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_15/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_15/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_15/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_20/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_20/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_20/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_25/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_25/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_25/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_5/centered_abs_mean": 0.17714880108833314, "signal/frontier_coverage_5/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_5/group_std_mean": 0.22984228730201722, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031709634698927403, "signal/frontier_ece_reward/centered_abs_mean": 0.026306905224919318, "signal/frontier_ece_reward/group_bin_occupancy": 0.859375, "signal/frontier_ece_reward/group_std_mean": 0.03540766686201095, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032883631531149147, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032883631531149147, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27145218253135683, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34705948233604433, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033931522816419604, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033931522816419604, "step": 120 }, { "calibration/aurc": 0.4289352146594846, "calibration/batch_distribution_entropy": 0.9840787382808802, "calibration/batch_entropy_100bins": 0.9694645908187793, "calibration/batch_entropy_10bins": 0.9840787382808802, "calibration/batch_entropy_50bins": 0.9801606203621676, "calibration/batch_uniqueness": 0.9542595829124529, "calibration/buffer_distribution_entropy": 0.9990103579945624, "calibration/buffer_entropy_100bins": 0.9979487473823087, "calibration/buffer_entropy_10bins": 0.9990103579945624, "calibration/buffer_entropy_50bins": 0.9985370679148613, "calibration/confidence_entropy": 0.4909405142009618, "calibration/coverage@0%": 0.003907014432485323, "calibration/coverage@1%": 0.003907014432485323, "calibration/coverage@10%": 0.003907014432485323, "calibration/coverage@15%": 0.007422639432485323, "calibration/coverage@20%": 0.016407014432485323, "calibration/coverage@25%": 0.10820388943248531, "calibration/coverage@30%": 0.1953132644324853, "calibration/coverage@5%": 0.003907014432485323, "calibration/ece": 0.17591819275681503, "calibration/mean_confidence": 0.5040096555151118, "calibration/prompt_uniqueness": 0.8661961518763007, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1150.2, "completions/max_terminated_length": 636.6, "completions/mean_length": 193.98671875, "completions/mean_terminated_length": 193.59335632324218, "completions/min_length": 97.6, "completions/min_terminated_length": 97.6, "epoch": 0.4, "grad_norm": 0.00110912777017802, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 418841336.0, "reward": 0.8370228409767151, "reward_std": 0.10723992139101028, "rewards/accuracy_reward": 0.50869140625, "rewards/brier_reward": 0.7669232487678528, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.003695770213380456, "rewards/frontier_coverage_1": 0.1053567928262055, "rewards/frontier_coverage_10": 0.1053567928262055, "rewards/frontier_coverage_15": 0.1053567928262055, "rewards/frontier_coverage_20": 0.1053567928262055, "rewards/frontier_coverage_25": 0.1053567928262055, "rewards/frontier_coverage_5": 0.1053567928262055, "rewards/frontier_ece_reward": 0.012649891711771489, "rewards/frontier_entropy_batch_reward": -0.20658698678016663, "signal/accuracy_reward/centered_abs_mean": 0.121722412109375, "signal/accuracy_reward/group_bin_occupancy": 0.180859375, "signal/accuracy_reward/group_std_mean": 0.15902018547058105, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875, "signal/advantage_abs_mean": 0.08474465608596801, "signal/advantage_pre_scale_abs_mean": 0.08474465608596801, "signal/advantage_pre_scale_std": 0.12552352696657182, "signal/advantage_std": 0.12552352696657182, "signal/brier_reward/centered_abs_mean": 0.14580391943454743, "signal/brier_reward/group_bin_occupancy": 0.859765625, "signal/brier_reward/group_std_mean": 0.18698894679546357, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01822548992931843, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01822548992931843, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.003800245560705662, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625, "signal/frontier_aurc_reward/group_std_mean": 0.005965401232242584, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.80243938404601e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.80243938404601e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_1/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_1/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_10/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_10/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_10/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_15/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_15/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_15/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_20/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_20/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_20/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_25/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_25/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_25/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_5/centered_abs_mean": 0.1798011213541031, "signal/frontier_coverage_5/group_bin_occupancy": 0.862890625, "signal/frontier_coverage_5/group_std_mean": 0.23461248278617858, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032184398733079433, "signal/frontier_ece_reward/centered_abs_mean": 0.022467482089996337, "signal/frontier_ece_reward/group_bin_occupancy": 0.891015625, "signal/frontier_ece_reward/group_std_mean": 0.02992837503552437, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002808435261249542, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002808435261249542, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2770455002784729, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73203125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35210344195365906, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034630687534809114, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034630687534809114, "step": 125 }, { "calibration/aurc": 0.31638217177441763, "calibration/batch_distribution_entropy": 0.9742986092431437, "calibration/batch_entropy_100bins": 0.9684802374319121, "calibration/batch_entropy_10bins": 0.9742986092431437, "calibration/batch_entropy_50bins": 0.9745079873131764, "calibration/batch_uniqueness": 0.9527449993937459, "calibration/buffer_distribution_entropy": 0.99893326457422, "calibration/buffer_entropy_100bins": 0.9988161195947732, "calibration/buffer_entropy_10bins": 0.99893326457422, "calibration/buffer_entropy_50bins": 0.9989688448398335, "calibration/confidence_entropy": 0.5050498236628885, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.045703125, "calibration/coverage@15%": 0.071484375, "calibration/coverage@20%": 0.15277641878669276, "calibration/coverage@25%": 0.27740566903131114, "calibration/coverage@30%": 0.4689112952544031, "calibration/coverage@5%": 0.014453125, "calibration/ece": 0.09948484508211022, "calibration/mean_confidence": 0.5181539975937481, "calibration/prompt_uniqueness": 0.8683413883649844, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 1142.0, "completions/max_terminated_length": 758.0, "completions/mean_length": 196.4564453125, "completions/mean_terminated_length": 195.80313110351562, "completions/min_length": 101.2, "completions/min_terminated_length": 101.2, "epoch": 0.416, "grad_norm": 0.0009177210740745068, "learning_rate": 1e-06, "loss": 0.0013, "num_tokens": 435734234.0, "reward": 0.845232892036438, "reward_std": 0.10218746364116668, "rewards/accuracy_reward": 0.5146484375, "rewards/brier_reward": 0.7825942277908325, "rewards/format_reward": 0.99951171875, "rewards/frontier_aurc_reward": -0.0030067469459027054, "rewards/frontier_coverage_1": 0.10945327430963517, "rewards/frontier_coverage_10": 0.10945327430963517, "rewards/frontier_coverage_15": 0.10945327430963517, "rewards/frontier_coverage_20": 0.10945327430963517, "rewards/frontier_coverage_25": 0.10938042849302292, "rewards/frontier_coverage_5": 0.10945327430963517, "rewards/frontier_ece_reward": 0.012254784442484378, "rewards/frontier_entropy_batch_reward": -0.18322778046131133, "signal/accuracy_reward/centered_abs_mean": 0.111279296875, "signal/accuracy_reward/group_bin_occupancy": 0.177734375, "signal/accuracy_reward/group_std_mean": 0.14699049890041352, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0556396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0556396484375, "signal/advantage_abs_mean": 0.07910501658916473, "signal/advantage_pre_scale_abs_mean": 0.07910501658916473, "signal/advantage_pre_scale_std": 0.11764014065265656, "signal/advantage_std": 0.11764014065265656, "signal/brier_reward/centered_abs_mean": 0.14102001786231994, "signal/brier_reward/group_bin_occupancy": 0.85859375, "signal/brier_reward/group_std_mean": 0.18009372055530548, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017627502232789992, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.017627502232789992, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_bin_occupancy": 0.126953125, "signal/format_reward/group_std_mean": 0.002762135770171881, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032022904139012097, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73828125, "signal/frontier_aurc_reward/group_std_mean": 0.005255010444670916, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7320995983900504e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7320995983900504e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.18824252784252166, "signal/frontier_coverage_1/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_1/group_std_mean": 0.23771241903305054, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_10/centered_abs_mean": 0.18824252784252166, "signal/frontier_coverage_10/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_10/group_std_mean": 0.23771241903305054, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_15/centered_abs_mean": 0.18824252784252166, "signal/frontier_coverage_15/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_15/group_std_mean": 0.23771241903305054, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_20/centered_abs_mean": 0.18824252784252166, "signal/frontier_coverage_20/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_20/group_std_mean": 0.23771241903305054, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_25/centered_abs_mean": 0.18796592950820923, "signal/frontier_coverage_25/group_bin_occupancy": 0.886328125, "signal/frontier_coverage_25/group_std_mean": 0.23737676739692687, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003364589996635914, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003364589996635914, "signal/frontier_coverage_5/centered_abs_mean": 0.18824252784252166, "signal/frontier_coverage_5/group_bin_occupancy": 0.8859375, "signal/frontier_coverage_5/group_std_mean": 0.23771241903305054, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003369541047140956, "signal/frontier_ece_reward/centered_abs_mean": 0.01897584684193134, "signal/frontier_ece_reward/group_bin_occupancy": 0.905078125, "signal/frontier_ece_reward/group_std_mean": 0.0248013224452734, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023719808552414177, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023719808552414177, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.258677664399147, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3334538578987122, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03233470804989338, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03233470804989338, "step": 130 }, { "calibration/aurc": 0.27110627824789024, "calibration/batch_distribution_entropy": 0.9834757676207486, "calibration/batch_entropy_100bins": 0.9703422906787201, "calibration/batch_entropy_10bins": 0.9834757676207486, "calibration/batch_entropy_50bins": 0.9793241316918639, "calibration/batch_uniqueness": 0.954693603515625, "calibration/buffer_distribution_entropy": 0.9993392738307401, "calibration/buffer_entropy_100bins": 0.9991317727363503, "calibration/buffer_entropy_10bins": 0.9993392738307401, "calibration/buffer_entropy_50bins": 0.9992819152374992, "calibration/confidence_entropy": 0.47524143822844167, "calibration/coverage@0%": 0.016796875, "calibration/coverage@1%": 0.016796875, "calibration/coverage@10%": 0.074609375, "calibration/coverage@15%": 0.255078125, "calibration/coverage@20%": 0.341796875, "calibration/coverage@25%": 0.423828125, "calibration/coverage@30%": 0.580078125, "calibration/coverage@5%": 0.021484375, "calibration/ece": 0.11594602895950101, "calibration/mean_confidence": 0.5297345627242726, "calibration/prompt_uniqueness": 0.85546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 903.6, "completions/max_terminated_length": 518.6, "completions/mean_length": 197.63076171875, "completions/mean_terminated_length": 197.3692840576172, "completions/min_length": 99.8, "completions/min_terminated_length": 99.8, "epoch": 0.432, "grad_norm": 0.0010740357683971524, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 452772309.0, "reward": 0.8608399510383606, "reward_std": 0.09858821481466293, "rewards/accuracy_reward": 0.54716796875, "rewards/brier_reward": 0.7949665904045105, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002391556603834033, "rewards/frontier_coverage_1": 0.10390491709113121, "rewards/frontier_coverage_10": 0.10390475168824195, "rewards/frontier_coverage_15": 0.10390453487634659, "rewards/frontier_coverage_20": 0.10388994812965394, "rewards/frontier_coverage_25": 0.10309707075357437, "rewards/frontier_coverage_5": 0.10390491709113121, "rewards/frontier_ece_reward": 0.012549491226673126, "rewards/frontier_entropy_batch_reward": -0.1975017488002777, "signal/accuracy_reward/centered_abs_mean": 0.111602783203125, "signal/accuracy_reward/group_bin_occupancy": 0.17734375, "signal/accuracy_reward/group_std_mean": 0.14663170725107194, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0558013916015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0558013916015625, "signal/advantage_abs_mean": 0.07775494158267975, "signal/advantage_pre_scale_abs_mean": 0.07775494158267975, "signal/advantage_pre_scale_std": 0.11521324068307877, "signal/advantage_std": 0.11521324068307877, "signal/brier_reward/centered_abs_mean": 0.1291389599442482, "signal/brier_reward/group_bin_occupancy": 0.85390625, "signal/brier_reward/group_std_mean": 0.16580995321273803, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016142369993031026, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016142369993031026, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027791480533778667, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125, "signal/frontier_aurc_reward/group_std_mean": 0.004389007203280925, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.974674666300416e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.974674666300416e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1781601697206497, "signal/frontier_coverage_1/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_1/group_std_mean": 0.22925682067871095, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003189067030325532, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003189067030325532, "signal/frontier_coverage_10/centered_abs_mean": 0.1781599998474121, "signal/frontier_coverage_10/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_10/group_std_mean": 0.22925659418106079, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003189063956961036, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003189063956961036, "signal/frontier_coverage_15/centered_abs_mean": 0.1781597375869751, "signal/frontier_coverage_15/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_15/group_std_mean": 0.22925626039505004, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031890592537820337, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031890592537820337, "signal/frontier_coverage_20/centered_abs_mean": 0.17814434170722962, "signal/frontier_coverage_20/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_20/group_std_mean": 0.22923634946346283, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031887838151305912, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031887838151305912, "signal/frontier_coverage_25/centered_abs_mean": 0.1761375993490219, "signal/frontier_coverage_25/group_bin_occupancy": 0.871875, "signal/frontier_coverage_25/group_std_mean": 0.2266537368297577, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031528628896921873, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031528628896921873, "signal/frontier_coverage_5/centered_abs_mean": 0.1781601697206497, "signal/frontier_coverage_5/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_5/group_std_mean": 0.22925682067871095, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003189067030325532, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003189067030325532, "signal/frontier_ece_reward/centered_abs_mean": 0.016893037036061286, "signal/frontier_ece_reward/group_bin_occupancy": 0.90234375, "signal/frontier_ece_reward/group_std_mean": 0.02187432684004307, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021116296295076607, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021116296295076607, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2685338854789734, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7390625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3456457793712616, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033566735684871674, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033566735684871674, "step": 135 }, { "calibration/aurc": 0.28285148846655567, "calibration/batch_distribution_entropy": 0.9756966652094482, "calibration/batch_entropy_100bins": 0.9671782953324402, "calibration/batch_entropy_10bins": 0.9756966652094482, "calibration/batch_entropy_50bins": 0.974769557863015, "calibration/batch_uniqueness": 0.9531880721897764, "calibration/buffer_distribution_entropy": 0.9993609676432893, "calibration/buffer_entropy_100bins": 0.9991891999147186, "calibration/buffer_entropy_10bins": 0.9993609676432893, "calibration/buffer_entropy_50bins": 0.999319530042054, "calibration/confidence_entropy": 0.5029605620968143, "calibration/coverage@0%": 0.025391389432485322, "calibration/coverage@1%": 0.025391389432485322, "calibration/coverage@10%": 0.08907167318982387, "calibration/coverage@15%": 0.15821459148727984, "calibration/coverage@20%": 0.23907473091976517, "calibration/coverage@25%": 0.3297272504892368, "calibration/coverage@30%": 0.5098833476027397, "calibration/coverage@5%": 0.04375076443248532, "calibration/ece": 0.11815471566014837, "calibration/mean_confidence": 0.5428597595536488, "calibration/prompt_uniqueness": 0.867127087262617, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 1115.8, "completions/max_terminated_length": 790.0, "completions/mean_length": 203.92919921875, "completions/mean_terminated_length": 203.5388946533203, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 0.448, "grad_norm": 0.0009459942230023444, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 469813344.0, "reward": 0.8468173861503601, "reward_std": 0.09660987108945847, "rewards/accuracy_reward": 0.51064453125, "rewards/brier_reward": 0.7961806297302246, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002907540462911129, "rewards/frontier_coverage_1": 0.12633958905935289, "rewards/frontier_coverage_10": 0.12633958905935289, "rewards/frontier_coverage_15": 0.12633958905935289, "rewards/frontier_coverage_20": 0.1263102501630783, "rewards/frontier_coverage_25": 0.1250537723302841, "rewards/frontier_coverage_5": 0.12633958905935289, "rewards/frontier_ece_reward": 0.010841607302427291, "rewards/frontier_entropy_batch_reward": -0.1818355828523636, "signal/accuracy_reward/centered_abs_mean": 0.104315185546875, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.1402893543243408, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521575927734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0521575927734375, "signal/advantage_abs_mean": 0.07476266324520112, "signal/advantage_pre_scale_abs_mean": 0.07476266324520112, "signal/advantage_pre_scale_std": 0.11200970113277435, "signal/advantage_std": 0.11200970113277435, "signal/brier_reward/centered_abs_mean": 0.12935363054275512, "signal/brier_reward/group_bin_occupancy": 0.855078125, "signal/brier_reward/group_std_mean": 0.16653329730033875, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01616920381784439, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01616920381784439, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002982544107362628, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375, "signal/frontier_aurc_reward/group_std_mean": 0.004976610559970141, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3387537627713753e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3387537627713753e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17772595584392548, "signal/frontier_coverage_1/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_1/group_std_mean": 0.22692298889160156, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_10/centered_abs_mean": 0.17772595584392548, "signal/frontier_coverage_10/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_10/group_std_mean": 0.22692298889160156, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_15/centered_abs_mean": 0.17772595584392548, "signal/frontier_coverage_15/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_15/group_std_mean": 0.22692298889160156, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_20/centered_abs_mean": 0.1776350975036621, "signal/frontier_coverage_20/group_bin_occupancy": 0.876171875, "signal/frontier_coverage_20/group_std_mean": 0.22681189179420472, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031796682626008986, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031796682626008986, "signal/frontier_coverage_25/centered_abs_mean": 0.17426885068416595, "signal/frontier_coverage_25/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_25/group_std_mean": 0.22260749340057373, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003119412390515208, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003119412390515208, "signal/frontier_coverage_5/centered_abs_mean": 0.17772595584392548, "signal/frontier_coverage_5/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_5/group_std_mean": 0.22692298889160156, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031812945380806923, "signal/frontier_ece_reward/centered_abs_mean": 0.015317396074533463, "signal/frontier_ece_reward/group_bin_occupancy": 0.89140625, "signal/frontier_ece_reward/group_std_mean": 0.0201519463211298, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001914674509316683, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001914674509316683, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2546141266822815, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.742578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.32758485078811644, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03182676583528519, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03182676583528519, "step": 140 }, { "calibration/aurc": 0.3984116060553997, "calibration/batch_distribution_entropy": 0.9822031101518748, "calibration/batch_entropy_100bins": 0.9714458118149061, "calibration/batch_entropy_10bins": 0.9822031101518748, "calibration/batch_entropy_50bins": 0.9794551763094261, "calibration/batch_uniqueness": 0.9540252685546875, "calibration/buffer_distribution_entropy": 0.9991268812264866, "calibration/buffer_entropy_100bins": 0.9990936316751196, "calibration/buffer_entropy_10bins": 0.9991268812264866, "calibration/buffer_entropy_50bins": 0.9991993587888806, "calibration/confidence_entropy": 0.5101373667560415, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.015625, "calibration/coverage@15%": 0.019140625, "calibration/coverage@20%": 0.1140625, "calibration/coverage@25%": 0.18828125, "calibration/coverage@30%": 0.283203125, "calibration/coverage@5%": 0.01171875, "calibration/ece": 0.12294057415090998, "calibration/mean_confidence": 0.4963258028783944, "calibration/prompt_uniqueness": 0.85458984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 1060.8, "completions/max_terminated_length": 660.8, "completions/mean_length": 204.21494140625, "completions/mean_terminated_length": 203.95458374023437, "completions/min_length": 107.8, "completions/min_terminated_length": 107.8, "epoch": 0.464, "grad_norm": 0.0008858161745592952, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 487075321.0, "reward": 0.8189481854438782, "reward_std": 0.09576210975646973, "rewards/accuracy_reward": 0.4634765625, "rewards/brier_reward": 0.7746347069740296, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003687644097954035, "rewards/frontier_coverage_1": 0.14042913317680358, "rewards/frontier_coverage_10": 0.14042913317680358, "rewards/frontier_coverage_15": 0.14042913317680358, "rewards/frontier_coverage_20": 0.1404130145907402, "rewards/frontier_coverage_25": 0.13810611069202422, "rewards/frontier_coverage_5": 0.14042913317680358, "rewards/frontier_ece_reward": 0.008142163883894682, "rewards/frontier_entropy_batch_reward": -0.20411013662815095, "signal/accuracy_reward/centered_abs_mean": 0.09339599609375, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.13170869201421737, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046697998046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.046697998046875, "signal/advantage_abs_mean": 0.07327512502670289, "signal/advantage_pre_scale_abs_mean": 0.07327512502670289, "signal/advantage_pre_scale_std": 0.11063500344753266, "signal/advantage_std": 0.11063500344753266, "signal/brier_reward/centered_abs_mean": 0.1346296638250351, "signal/brier_reward/group_bin_occupancy": 0.84765625, "signal/brier_reward/group_std_mean": 0.17363184988498687, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01682870797812939, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01682870797812939, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0032396471593528985, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875, "signal/frontier_aurc_reward/group_std_mean": 0.005168183147907257, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.798968268209137e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.798968268209137e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17095789611339568, "signal/frontier_coverage_1/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_1/group_std_mean": 0.22138096988201142, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_10/centered_abs_mean": 0.17095789611339568, "signal/frontier_coverage_10/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_10/group_std_mean": 0.22138096988201142, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_15/centered_abs_mean": 0.17095789611339568, "signal/frontier_coverage_15/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_15/group_std_mean": 0.22138096988201142, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_20/centered_abs_mean": 0.17093894481658936, "signal/frontier_coverage_20/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_20/group_std_mean": 0.2213562995195389, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030598069541156294, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030598069541156294, "signal/frontier_coverage_25/centered_abs_mean": 0.16712769567966462, "signal/frontier_coverage_25/group_bin_occupancy": 0.878125, "signal/frontier_coverage_25/group_std_mean": 0.2164506733417511, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029915857128798963, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029915857128798963, "signal/frontier_coverage_5/centered_abs_mean": 0.17095789611339568, "signal/frontier_coverage_5/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_5/group_std_mean": 0.22138096988201142, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00306014628149569, "signal/frontier_ece_reward/centered_abs_mean": 0.0138343783095479, "signal/frontier_ece_reward/group_bin_occupancy": 0.886328125, "signal/frontier_ece_reward/group_std_mean": 0.01835048608481884, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017292972886934876, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017292972886934876, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27222808003425597, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34940491914749144, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034028510004281996, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034028510004281996, "step": 145 }, { "calibration/aurc": 0.2832759600511364, "calibration/batch_distribution_entropy": 0.9715486304314798, "calibration/batch_entropy_100bins": 0.9645395963303353, "calibration/batch_entropy_10bins": 0.9715486304314798, "calibration/batch_entropy_50bins": 0.9721231779983697, "calibration/batch_uniqueness": 0.9513397216796875, "calibration/buffer_distribution_entropy": 0.9989767926105942, "calibration/buffer_entropy_100bins": 0.9990000341778547, "calibration/buffer_entropy_10bins": 0.9989767926105942, "calibration/buffer_entropy_50bins": 0.9990904829689274, "calibration/confidence_entropy": 0.480528899956653, "calibration/coverage@0%": 0.009765625, "calibration/coverage@1%": 0.009765625, "calibration/coverage@10%": 0.134375, "calibration/coverage@15%": 0.225, "calibration/coverage@20%": 0.310546875, "calibration/coverage@25%": 0.396484375, "calibration/coverage@30%": 0.5953125, "calibration/coverage@5%": 0.026953125, "calibration/ece": 0.14768165181713427, "calibration/mean_confidence": 0.49541220339789815, "calibration/prompt_uniqueness": 0.85048828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 964.8, "completions/max_terminated_length": 635.2, "completions/mean_length": 199.683984375, "completions/mean_terminated_length": 199.29322509765626, "completions/min_length": 102.6, "completions/min_terminated_length": 102.6, "epoch": 0.48, "grad_norm": 0.00261081475764513, "learning_rate": 1e-06, "loss": 0.001, "num_tokens": 504168117.0, "reward": 0.8438061833381653, "reward_std": 0.09672794342041016, "rewards/accuracy_reward": 0.51884765625, "rewards/brier_reward": 0.7764919996261597, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.003090168023481965, "rewards/frontier_coverage_1": 0.11306091845035553, "rewards/frontier_coverage_10": 0.11306091845035553, "rewards/frontier_coverage_15": 0.11306091845035553, "rewards/frontier_coverage_20": 0.11289113312959671, "rewards/frontier_coverage_25": 0.11131031811237335, "rewards/frontier_coverage_5": 0.11306091845035553, "rewards/frontier_ece_reward": 0.008230427093803883, "rewards/frontier_entropy_batch_reward": -0.20491617918014526, "signal/accuracy_reward/centered_abs_mean": 0.117669677734375, "signal/accuracy_reward/group_bin_occupancy": 0.1796875, "signal/accuracy_reward/group_std_mean": 0.15455419719219207, "signal/accuracy_reward/group_zero_std_frac": 0.5625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0588348388671875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0588348388671875, "signal/advantage_abs_mean": 0.07520890831947327, "signal/advantage_pre_scale_abs_mean": 0.07520890831947327, "signal/advantage_pre_scale_std": 0.11293750852346421, "signal/advantage_std": 0.11293750852346421, "signal/brier_reward/centered_abs_mean": 0.1358100563287735, "signal/brier_reward/group_bin_occupancy": 0.8375, "signal/brier_reward/group_std_mean": 0.1739418923854828, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016976257041096687, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016976257041096687, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029037161730229855, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7359375, "signal/frontier_aurc_reward/group_std_mean": 0.0047456233762204645, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1976518443552776e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1976518443552776e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.19191361963748932, "signal/frontier_coverage_1/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_1/group_std_mean": 0.24454809129238128, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_10/centered_abs_mean": 0.19191361963748932, "signal/frontier_coverage_10/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_10/group_std_mean": 0.24454809129238128, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_15/centered_abs_mean": 0.19191361963748932, "signal/frontier_coverage_15/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_15/group_std_mean": 0.24454809129238128, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_20/centered_abs_mean": 0.19139576852321624, "signal/frontier_coverage_20/group_bin_occupancy": 0.85625, "signal/frontier_coverage_20/group_std_mean": 0.24390313625335694, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003425984038040042, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003425984038040042, "signal/frontier_coverage_25/centered_abs_mean": 0.18600209653377534, "signal/frontier_coverage_25/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_25/group_std_mean": 0.23725315928459167, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033294373657554387, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033294373657554387, "signal/frontier_coverage_5/centered_abs_mean": 0.19191361963748932, "signal/frontier_coverage_5/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_5/group_std_mean": 0.24454809129238128, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034352536778897045, "signal/frontier_ece_reward/centered_abs_mean": 0.013514818623661995, "signal/frontier_ece_reward/group_bin_occupancy": 0.901171875, "signal/frontier_ece_reward/group_std_mean": 0.017669208720326422, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016893523279577494, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016893523279577494, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26671458780765533, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34039146900177003, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333932347595692, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333932347595692, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.5405549587051462, "eval_calibration/batch_distribution_entropy": 0.9065406036093964, "eval_calibration/batch_entropy_100bins": 0.6972847745692209, "eval_calibration/batch_entropy_10bins": 0.9065406036093964, "eval_calibration/batch_entropy_50bins": 0.7754914476517584, "eval_calibration/batch_uniqueness": 0.89453125, "eval_calibration/buffer_distribution_entropy": 0.9988051667639347, "eval_calibration/buffer_entropy_100bins": 0.9988938324367392, "eval_calibration/buffer_entropy_10bins": 0.9988051667639347, "eval_calibration/buffer_entropy_50bins": 0.9989811313318033, "eval_calibration/confidence_entropy": 0.48391809012155884, "eval_calibration/coverage@0%": 0.0390625, "eval_calibration/coverage@1%": 0.0390625, "eval_calibration/coverage@10%": 0.0390625, "eval_calibration/coverage@15%": 0.0390625, "eval_calibration/coverage@20%": 0.046875, "eval_calibration/coverage@25%": 0.046875, "eval_calibration/coverage@30%": 0.046875, "eval_calibration/coverage@5%": 0.0390625, "eval_calibration/ece": 0.2232385876510525, "eval_calibration/mean_confidence": 0.4669556942027052, "eval_calibration/prompt_uniqueness": 0.89453125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 387.0, "eval_completions/max_terminated_length": 387.0, "eval_completions/mean_length": 198.34226989746094, "eval_completions/mean_terminated_length": 198.34226989746094, "eval_completions/min_length": 125.0, "eval_completions/min_terminated_length": 125.0, "eval_loss": 0.0, "eval_num_tokens": 504168117.0, "eval_reward": 0.699365958571434, "eval_reward_std": 0.21834751963615417, "eval_rewards/accuracy_reward": 0.412109375, "eval_rewards/brier_reward": 0.7784133553504944, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.004543848393950611, "eval_rewards/frontier_coverage_1": 0.18927186354994774, "eval_rewards/frontier_coverage_10": 0.18927159160375595, "eval_rewards/frontier_coverage_15": 0.18926333636045456, "eval_rewards/frontier_coverage_20": 0.18883728608489037, "eval_rewards/frontier_coverage_25": 0.17392469197511673, "eval_rewards/frontier_coverage_5": 0.18927186354994774, "eval_rewards/frontier_ece_reward": 0.00836634065490216, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 19.6978, "eval_samples_per_second": 25.384, "eval_signal/accuracy_reward/centered_abs_mean": 0.4649658203125, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4890812262892723, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23248291015625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23248291015625, "eval_signal/advantage_abs_mean": 0.19869232177734375, "eval_signal/advantage_pre_scale_abs_mean": 0.19869232177734375, "eval_signal/advantage_pre_scale_std": 0.2162884622812271, "eval_signal/advantage_std": 0.2162884622812271, "eval_signal/brier_reward/centered_abs_mean": 0.20233283191919327, "eval_signal/brier_reward/group_bin_occupancy": 0.9140625, "eval_signal/brier_reward/group_std_mean": 0.2505420297384262, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02529160398989916, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02529160398989916, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005963263858575374, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.78125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.010063497698865831, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010674241821106989, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010674241821106989, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36536306887865067, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_1/group_std_mean": 0.4483560249209404, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00653999880887568, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00653999880887568, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3653620555996895, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_10/group_std_mean": 0.4483548328280449, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0065399802988395095, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0065399802988395095, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.365331307053566, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_15/group_std_mean": 0.448319248855114, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0065394300036132336, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0065394300036132336, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.36427226662635803, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_20/group_std_mean": 0.4470879137516022, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006520473049022257, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006520473049022257, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.33615638315677643, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_25/group_std_mean": 0.4133630245923996, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006017199018970132, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006017199018970132, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36536306887865067, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625, "eval_signal/frontier_coverage_5/group_std_mean": 0.4483560249209404, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00653999880887568, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00653999880887568, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.01719106500968337, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875, "eval_signal/frontier_ece_reward/group_std_mean": 0.022434783168137074, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021488831262104213, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021488831262104213, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.203, "step": 150 }, { "calibration/aurc": 0.3869300543313942, "calibration/batch_distribution_entropy": 0.9808689690281369, "calibration/batch_entropy_100bins": 0.9674042155001947, "calibration/batch_entropy_10bins": 0.9808689690281369, "calibration/batch_entropy_50bins": 0.976446592128552, "calibration/batch_uniqueness": 0.9534576416015625, "calibration/buffer_distribution_entropy": 0.9986828299800502, "calibration/buffer_entropy_100bins": 0.9988154597442325, "calibration/buffer_entropy_10bins": 0.9986828299800502, "calibration/buffer_entropy_50bins": 0.9988999650475596, "calibration/confidence_entropy": 0.4790370333140571, "calibration/coverage@0%": 0.023828125, "calibration/coverage@1%": 0.023828125, "calibration/coverage@10%": 0.09765625, "calibration/coverage@15%": 0.1453125, "calibration/coverage@20%": 0.20390625, "calibration/coverage@25%": 0.250390625, "calibration/coverage@30%": 0.290625, "calibration/coverage@5%": 0.053515625, "calibration/ece": 0.13844372663002122, "calibration/mean_confidence": 0.5048848300838907, "calibration/prompt_uniqueness": 0.851220703125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 460.8, "completions/max_terminated_length": 460.8, "completions/mean_length": 195.812890625, "completions/mean_terminated_length": 195.812890625, "completions/min_length": 98.6, "completions/min_terminated_length": 98.6, "epoch": 0.496, "grad_norm": 0.0008509118924848735, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 521481081.0, "reward": 0.8582545042037963, "reward_std": 0.09380114525556564, "rewards/accuracy_reward": 0.5439453125, "rewards/brier_reward": 0.7857403755187988, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003081470658071339, "rewards/frontier_coverage_1": 0.10034325867891311, "rewards/frontier_coverage_10": 0.10034310221672058, "rewards/frontier_coverage_15": 0.10032327324151993, "rewards/frontier_coverage_20": 0.10001767575740814, "rewards/frontier_coverage_25": 0.09081147015094757, "rewards/frontier_coverage_5": 0.10034325867891311, "rewards/frontier_ece_reward": 0.007945819105952979, "rewards/frontier_entropy_batch_reward": -0.18739983737468718, "signal/accuracy_reward/centered_abs_mean": 0.0947265625, "signal/accuracy_reward/group_bin_occupancy": 0.17421875, "signal/accuracy_reward/group_std_mean": 0.13038647025823594, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04736328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04736328125, "signal/advantage_abs_mean": 0.07237804681062698, "signal/advantage_pre_scale_abs_mean": 0.07237804681062698, "signal/advantage_pre_scale_std": 0.10942972600460052, "signal/advantage_std": 0.10942972600460052, "signal/brier_reward/centered_abs_mean": 0.12652941197156906, "signal/brier_reward/group_bin_occupancy": 0.849609375, "signal/brier_reward/group_std_mean": 0.1622232437133789, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015816176496446132, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015816176496446132, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.003096145251765847, "signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375, "signal/frontier_aurc_reward/group_std_mean": 0.005016565602272749, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.542099897866137e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.542099897866137e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1638656437397003, "signal/frontier_coverage_1/group_bin_occupancy": 0.86875, "signal/frontier_coverage_1/group_std_mean": 0.21250625550746918, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002933195047080517, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002933195047080517, "signal/frontier_coverage_10/centered_abs_mean": 0.16386164724826813, "signal/frontier_coverage_10/group_bin_occupancy": 0.86875, "signal/frontier_coverage_10/group_std_mean": 0.21250071823596955, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029331233818084, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029331233818084, "signal/frontier_coverage_15/centered_abs_mean": 0.16379604637622833, "signal/frontier_coverage_15/group_bin_occupancy": 0.86875, "signal/frontier_coverage_15/group_std_mean": 0.21241317689418793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029319490771740676, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029319490771740676, "signal/frontier_coverage_20/centered_abs_mean": 0.1630973845720291, "signal/frontier_coverage_20/group_bin_occupancy": 0.867578125, "signal/frontier_coverage_20/group_std_mean": 0.2114973783493042, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002919443091377616, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002919443091377616, "signal/frontier_coverage_25/centered_abs_mean": 0.1438317209482193, "signal/frontier_coverage_25/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_25/group_std_mean": 0.18710974752902984, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257458770647645, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257458770647645, "signal/frontier_coverage_5/centered_abs_mean": 0.1638656437397003, "signal/frontier_coverage_5/group_bin_occupancy": 0.86875, "signal/frontier_coverage_5/group_std_mean": 0.21250625550746918, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002933195047080517, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002933195047080517, "signal/frontier_ece_reward/centered_abs_mean": 0.011986837163567543, "signal/frontier_ece_reward/group_bin_occupancy": 0.898046875, "signal/frontier_ece_reward/group_std_mean": 0.015694568678736687, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001498354645445943, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001498354645445943, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26680760979652407, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34285420179367065, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335095122456551, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335095122456551, "step": 155 }, { "calibration/aurc": 0.31852894466439724, "calibration/batch_distribution_entropy": 0.9846605362740333, "calibration/batch_entropy_100bins": 0.9731097753973357, "calibration/batch_entropy_10bins": 0.9846605362740333, "calibration/batch_entropy_50bins": 0.9797415936230249, "calibration/batch_uniqueness": 0.954290771484375, "calibration/buffer_distribution_entropy": 0.9985237641268538, "calibration/buffer_entropy_100bins": 0.9987331459546469, "calibration/buffer_entropy_10bins": 0.9985237641268538, "calibration/buffer_entropy_50bins": 0.9987960418931671, "calibration/confidence_entropy": 0.5095141511134974, "calibration/coverage@0%": 0.012890625, "calibration/coverage@1%": 0.012890625, "calibration/coverage@10%": 0.176953125, "calibration/coverage@15%": 0.29765625, "calibration/coverage@20%": 0.383203125, "calibration/coverage@25%": 0.45546875, "calibration/coverage@30%": 0.49453125, "calibration/coverage@5%": 0.069140625, "calibration/ece": 0.1362378664495895, "calibration/mean_confidence": 0.5084870717219879, "calibration/prompt_uniqueness": 0.86162109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 896.2, "completions/max_terminated_length": 459.0, "completions/mean_length": 189.36025390625, "completions/mean_terminated_length": 189.0965362548828, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 0.512, "grad_norm": 0.00109315593726933, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 538565794.0, "reward": 0.8589414358139038, "reward_std": 0.09629883468151093, "rewards/accuracy_reward": 0.5396484375, "rewards/brier_reward": 0.8017237544059753, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002702287444844842, "rewards/frontier_coverage_1": 0.11047709956765175, "rewards/frontier_coverage_10": 0.11047184318304062, "rewards/frontier_coverage_15": 0.11041708588600159, "rewards/frontier_coverage_20": 0.10978015959262848, "rewards/frontier_coverage_25": 0.09341206625103951, "rewards/frontier_coverage_5": 0.11047709956765175, "rewards/frontier_ece_reward": 0.008117536641657352, "rewards/frontier_entropy_batch_reward": -0.18771363496780397, "signal/accuracy_reward/centered_abs_mean": 0.1011474609375, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.13669176101684571, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05057373046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05057373046875, "signal/advantage_abs_mean": 0.07451938837766647, "signal/advantage_pre_scale_abs_mean": 0.07451938837766647, "signal/advantage_pre_scale_std": 0.11410035341978073, "signal/advantage_std": 0.11410035341978073, "signal/brier_reward/centered_abs_mean": 0.12140367329120635, "signal/brier_reward/group_bin_occupancy": 0.844921875, "signal/brier_reward/group_std_mean": 0.15884515047073364, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015175459161400794, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015175459161400794, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002908071083948016, "signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875, "signal/frontier_aurc_reward/group_std_mean": 0.00480275945737958, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.205447159823961e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.205447159823961e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15683222711086273, "signal/frontier_coverage_1/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_1/group_std_mean": 0.20588673055171966, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028072968125343323, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028072968125343323, "signal/frontier_coverage_10/centered_abs_mean": 0.15682255029678344, "signal/frontier_coverage_10/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_10/group_std_mean": 0.20587407648563386, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028071236796677113, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028071236796677113, "signal/frontier_coverage_15/centered_abs_mean": 0.15671851933002473, "signal/frontier_coverage_15/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_15/group_std_mean": 0.20573811531066893, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002805261267349124, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002805261267349124, "signal/frontier_coverage_20/centered_abs_mean": 0.15555653274059295, "signal/frontier_coverage_20/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_20/group_std_mean": 0.20422441959381105, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027844619005918505, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027844619005918505, "signal/frontier_coverage_25/centered_abs_mean": 0.1274886041879654, "signal/frontier_coverage_25/group_bin_occupancy": 0.85703125, "signal/frontier_coverage_25/group_std_mean": 0.16840324103832244, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022820457816123962, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022820457816123962, "signal/frontier_coverage_5/centered_abs_mean": 0.15683222711086273, "signal/frontier_coverage_5/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_5/group_std_mean": 0.20588673055171966, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028072968125343323, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028072968125343323, "signal/frontier_ece_reward/centered_abs_mean": 0.010539719834923744, "signal/frontier_ece_reward/group_bin_occupancy": 0.899609375, "signal/frontier_ece_reward/group_std_mean": 0.013909543678164483, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001317464979365468, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001317464979365468, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2608456969261169, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33714223504066465, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032605712115764615, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032605712115764615, "step": 160 }, { "calibration/aurc": 0.21917538463284222, "calibration/batch_distribution_entropy": 0.9878817709654515, "calibration/batch_entropy_100bins": 0.9757889859436031, "calibration/batch_entropy_10bins": 0.9878817709654515, "calibration/batch_entropy_50bins": 0.983224028911913, "calibration/batch_uniqueness": 0.9545867919921875, "calibration/buffer_distribution_entropy": 0.9985031862617003, "calibration/buffer_entropy_100bins": 0.9987566386866493, "calibration/buffer_entropy_10bins": 0.9985031862617003, "calibration/buffer_entropy_50bins": 0.9987999633528707, "calibration/confidence_entropy": 0.4950461976985136, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.25234375, "calibration/coverage@15%": 0.36328125, "calibration/coverage@20%": 0.51875, "calibration/coverage@25%": 0.65703125, "calibration/coverage@30%": 0.7484375, "calibration/coverage@5%": 0.141796875, "calibration/ece": 0.12159026779133557, "calibration/mean_confidence": 0.5102481512964239, "calibration/prompt_uniqueness": 0.847412109375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 686.2, "completions/max_terminated_length": 686.2, "completions/mean_length": 187.4068359375, "completions/mean_terminated_length": 187.4068359375, "completions/min_length": 93.4, "completions/min_terminated_length": 93.4, "epoch": 0.528, "grad_norm": 0.0009646462858654559, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 555514376.0, "reward": 0.8567448496818543, "reward_std": 0.0943350225687027, "rewards/accuracy_reward": 0.53310546875, "rewards/brier_reward": 0.8055103659629822, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0025082074804231524, "rewards/frontier_coverage_1": 0.1262974977493286, "rewards/frontier_coverage_10": 0.12625774666666983, "rewards/frontier_coverage_15": 0.1262197308242321, "rewards/frontier_coverage_20": 0.12498710155487061, "rewards/frontier_coverage_25": 0.09902235716581345, "rewards/frontier_coverage_5": 0.1262974977493286, "rewards/frontier_ece_reward": 0.006973131839185953, "rewards/frontier_entropy_batch_reward": -0.19499198198318482, "signal/accuracy_reward/centered_abs_mean": 0.108697509765625, "signal/accuracy_reward/group_bin_occupancy": 0.175, "signal/accuracy_reward/group_std_mean": 0.1418474718928337, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0543487548828125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0543487548828125, "signal/advantage_abs_mean": 0.07427967190742493, "signal/advantage_pre_scale_abs_mean": 0.07427967190742493, "signal/advantage_pre_scale_std": 0.1111309289932251, "signal/advantage_std": 0.1111309289932251, "signal/brier_reward/centered_abs_mean": 0.11962604224681854, "signal/brier_reward/group_bin_occupancy": 0.844921875, "signal/brier_reward/group_std_mean": 0.1540861427783966, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014953255280852317, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014953255280852317, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002598578087054193, "signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375, "signal/frontier_aurc_reward/group_std_mean": 0.004507921310141683, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6514547284459697e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6514547284459697e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17175144851207733, "signal/frontier_coverage_1/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_1/group_std_mean": 0.21923006176948548, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030743507202714683, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030743507202714683, "signal/frontier_coverage_10/centered_abs_mean": 0.1716869741678238, "signal/frontier_coverage_10/group_bin_occupancy": 0.88125, "signal/frontier_coverage_10/group_std_mean": 0.21915002167224884, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003073196718469262, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003073196718469262, "signal/frontier_coverage_15/centered_abs_mean": 0.17162050902843476, "signal/frontier_coverage_15/group_bin_occupancy": 0.88125, "signal/frontier_coverage_15/group_std_mean": 0.21906675398349762, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030720070470124485, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030720070470124485, "signal/frontier_coverage_20/centered_abs_mean": 0.16946081519126893, "signal/frontier_coverage_20/group_bin_occupancy": 0.88359375, "signal/frontier_coverage_20/group_std_mean": 0.21636516749858856, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030333484522998334, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030333484522998334, "signal/frontier_coverage_25/centered_abs_mean": 0.12273151576519012, "signal/frontier_coverage_25/group_bin_occupancy": 0.880859375, "signal/frontier_coverage_25/group_std_mean": 0.15797219574451446, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021968940272927284, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021968940272927284, "signal/frontier_coverage_5/centered_abs_mean": 0.17175144851207733, "signal/frontier_coverage_5/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_5/group_std_mean": 0.21923006176948548, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030743507202714683, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030743507202714683, "signal/frontier_ece_reward/centered_abs_mean": 0.009134939312934876, "signal/frontier_ece_reward/group_bin_occupancy": 0.883203125, "signal/frontier_ece_reward/group_std_mean": 0.012016034871339797, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011418674141168595, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011418674141168595, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2628565192222595, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3387132942676544, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03285706490278244, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285706490278244, "step": 165 }, { "calibration/aurc": 0.2330965079108403, "calibration/batch_distribution_entropy": 0.9783253458334003, "calibration/batch_entropy_100bins": 0.967895082463165, "calibration/batch_entropy_10bins": 0.9783253458334003, "calibration/batch_entropy_50bins": 0.9760524996239462, "calibration/batch_uniqueness": 0.9533475407324336, "calibration/buffer_distribution_entropy": 0.9984517660348228, "calibration/buffer_entropy_100bins": 0.9987360351808527, "calibration/buffer_entropy_10bins": 0.9984517660348228, "calibration/buffer_entropy_50bins": 0.9987687423048592, "calibration/confidence_entropy": 0.47645242697701995, "calibration/coverage@0%": 0.038713154354207434, "calibration/coverage@1%": 0.038713154354207434, "calibration/coverage@10%": 0.13606210249510764, "calibration/coverage@15%": 0.2814059442270059, "calibration/coverage@20%": 0.48266802226027394, "calibration/coverage@25%": 0.6222021771037183, "calibration/coverage@30%": 0.7140311582681018, "calibration/coverage@5%": 0.07820908757338552, "calibration/ece": 0.09310715311335953, "calibration/mean_confidence": 0.5309102969282397, "calibration/prompt_uniqueness": 0.8458068276047086, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 845.2, "completions/max_terminated_length": 624.6, "completions/mean_length": 188.4822265625, "completions/mean_terminated_length": 188.35069885253907, "completions/min_length": 99.4, "completions/min_terminated_length": 99.4, "epoch": 0.544, "grad_norm": 0.0012510113883763552, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 572608018.0, "reward": 0.8668020844459534, "reward_std": 0.0995595932006836, "rewards/accuracy_reward": 0.571484375, "rewards/brier_reward": 0.787188982963562, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002780396491289139, "rewards/frontier_coverage_1": 0.07695508673787117, "rewards/frontier_coverage_10": 0.07694511339068413, "rewards/frontier_coverage_15": 0.07687427774071694, "rewards/frontier_coverage_20": 0.07620680481195449, "rewards/frontier_coverage_25": 0.05764272883534431, "rewards/frontier_coverage_5": 0.07695508673787117, "rewards/frontier_ece_reward": 0.005536333145573735, "rewards/frontier_entropy_batch_reward": -0.2059100717306137, "signal/accuracy_reward/centered_abs_mean": 0.1152587890625, "signal/accuracy_reward/group_bin_occupancy": 0.178515625, "signal/accuracy_reward/group_std_mean": 0.15163064002990723, "signal/accuracy_reward/group_zero_std_frac": 0.571875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05762939453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05762939453125, "signal/advantage_abs_mean": 0.0772314801812172, "signal/advantage_pre_scale_abs_mean": 0.0772314801812172, "signal/advantage_pre_scale_std": 0.11531815230846405, "signal/advantage_std": 0.11531815230846405, "signal/brier_reward/centered_abs_mean": 0.13076411485671996, "signal/brier_reward/group_bin_occupancy": 0.857421875, "signal/brier_reward/group_std_mean": 0.1675712913274765, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016345514357089995, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.016345514357089995, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.002782534621655941, "signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875, "signal/frontier_aurc_reward/group_std_mean": 0.004524756595492363, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9807369941845535e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9807369941845535e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.17586564421653747, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.22499242424964905, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031479948200285436, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031479948200285436, "signal/frontier_coverage_10/centered_abs_mean": 0.1757751613855362, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.2248790979385376, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031463753432035444, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031463753432035444, "signal/frontier_coverage_15/centered_abs_mean": 0.17549155354499818, "signal/frontier_coverage_15/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_15/group_std_mean": 0.22451838254928588, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031412987038493155, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031412987038493155, "signal/frontier_coverage_20/centered_abs_mean": 0.1719941407442093, "signal/frontier_coverage_20/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_20/group_std_mean": 0.2201235145330429, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003078695107251406, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003078695107251406, "signal/frontier_coverage_25/centered_abs_mean": 0.1134360283613205, "signal/frontier_coverage_25/group_bin_occupancy": 0.8578125, "signal/frontier_coverage_25/group_std_mean": 0.14679449796676636, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002030504820868373, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002030504820868373, "signal/frontier_coverage_5/centered_abs_mean": 0.17586564421653747, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.22499242424964905, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031479948200285436, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031479948200285436, "signal/frontier_ece_reward/centered_abs_mean": 0.009148731268942356, "signal/frontier_ece_reward/group_bin_occupancy": 0.894140625, "signal/frontier_ece_reward/group_std_mean": 0.011937451735138892, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011435914086177946, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011435914086177946, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2780795097351074, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.351616758108139, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03475993871688843, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03475993871688843, "step": 170 }, { "calibration/aurc": 0.2589022891995325, "calibration/batch_distribution_entropy": 0.9807805330573605, "calibration/batch_entropy_100bins": 0.9685901941968815, "calibration/batch_entropy_10bins": 0.9807805330573605, "calibration/batch_entropy_50bins": 0.977349875364441, "calibration/batch_uniqueness": 0.9529144287109375, "calibration/buffer_distribution_entropy": 0.9982041576732013, "calibration/buffer_entropy_100bins": 0.99861205511106, "calibration/buffer_entropy_10bins": 0.9982041576732013, "calibration/buffer_entropy_50bins": 0.9986009396983668, "calibration/confidence_entropy": 0.487472109955439, "calibration/coverage@0%": 0.082421875, "calibration/coverage@1%": 0.09140625, "calibration/coverage@10%": 0.228125, "calibration/coverage@15%": 0.305859375, "calibration/coverage@20%": 0.354296875, "calibration/coverage@25%": 0.4875, "calibration/coverage@30%": 0.617578125, "calibration/coverage@5%": 0.18359375, "calibration/ece": 0.11482706723425573, "calibration/mean_confidence": 0.5064478468870471, "calibration/prompt_uniqueness": 0.852490234375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 447.6, "completions/max_terminated_length": 447.6, "completions/mean_length": 188.24775390625, "completions/mean_terminated_length": 188.24775390625, "completions/min_length": 98.6, "completions/min_terminated_length": 98.6, "epoch": 0.56, "grad_norm": 0.0009781933622434735, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 589357083.0, "reward": 0.8498636603355407, "reward_std": 0.09095648676156998, "rewards/accuracy_reward": 0.526171875, "rewards/brier_reward": 0.8025665879249573, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0028386770747601984, "rewards/frontier_coverage_1": 0.11719954907894134, "rewards/frontier_coverage_10": 0.11714765727519989, "rewards/frontier_coverage_15": 0.11693512350320816, "rewards/frontier_coverage_20": 0.11193432807922363, "rewards/frontier_coverage_25": 0.07785176485776901, "rewards/frontier_coverage_5": 0.11719954907894134, "rewards/frontier_ece_reward": 0.005842031445354223, "rewards/frontier_entropy_batch_reward": -0.2080444574356079, "signal/accuracy_reward/centered_abs_mean": 0.091064453125, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12434282898902893, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455322265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0455322265625, "signal/advantage_abs_mean": 0.07068178355693817, "signal/advantage_pre_scale_abs_mean": 0.07068178355693817, "signal/advantage_pre_scale_std": 0.10720473378896714, "signal/advantage_std": 0.10720473378896714, "signal/brier_reward/centered_abs_mean": 0.12168123424053193, "signal/brier_reward/group_bin_occupancy": 0.83828125, "signal/brier_reward/group_std_mean": 0.1573301523923874, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01521015428006649, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01521015428006649, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028686066623777153, "signal/frontier_aurc_reward/group_bin_occupancy": 0.716015625, "signal/frontier_aurc_reward/group_std_mean": 0.004602818004786968, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.134805833222345e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.134805833222345e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1588267892599106, "signal/frontier_coverage_1/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_1/group_std_mean": 0.2065411925315857, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002842999389395118, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002842999389395118, "signal/frontier_coverage_10/centered_abs_mean": 0.15874530375003815, "signal/frontier_coverage_10/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_10/group_std_mean": 0.20643724501132965, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002841540891677141, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002841540891677141, "signal/frontier_coverage_15/centered_abs_mean": 0.15840073227882384, "signal/frontier_coverage_15/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_15/group_std_mean": 0.20599766075611115, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002835373068228364, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002835373068228364, "signal/frontier_coverage_20/centered_abs_mean": 0.14993982166051864, "signal/frontier_coverage_20/group_bin_occupancy": 0.846484375, "signal/frontier_coverage_20/group_std_mean": 0.19521004855632781, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026839226484298706, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026839226484298706, "signal/frontier_coverage_25/centered_abs_mean": 0.09466438889503478, "signal/frontier_coverage_25/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_25/group_std_mean": 0.12403950989246368, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001694492483511567, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001694492483511567, "signal/frontier_coverage_5/centered_abs_mean": 0.1588267892599106, "signal/frontier_coverage_5/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_5/group_std_mean": 0.2065411925315857, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002842999389395118, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002842999389395118, "signal/frontier_ece_reward/centered_abs_mean": 0.008115557208657264, "signal/frontier_ece_reward/group_bin_occupancy": 0.8796875, "signal/frontier_ece_reward/group_std_mean": 0.01066547017544508, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001014444651082158, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001014444651082158, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274140340089798, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34681135416030884, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426754251122475, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426754251122475, "step": 175 }, { "calibration/aurc": 0.28829373194083824, "calibration/batch_distribution_entropy": 0.9841373980772424, "calibration/batch_entropy_100bins": 0.9681536817665121, "calibration/batch_entropy_10bins": 0.9841373980772424, "calibration/batch_entropy_50bins": 0.9792620166416952, "calibration/batch_uniqueness": 0.9536224365234375, "calibration/buffer_distribution_entropy": 0.998297768682672, "calibration/buffer_entropy_100bins": 0.9986629530424844, "calibration/buffer_entropy_10bins": 0.998297768682672, "calibration/buffer_entropy_50bins": 0.9986413654675284, "calibration/confidence_entropy": 0.48650558811910427, "calibration/coverage@0%": 0.019140625, "calibration/coverage@1%": 0.019140625, "calibration/coverage@10%": 0.109765625, "calibration/coverage@15%": 0.1859375, "calibration/coverage@20%": 0.351171875, "calibration/coverage@25%": 0.4765625, "calibration/coverage@30%": 0.573828125, "calibration/coverage@5%": 0.079296875, "calibration/ece": 0.10345090124198557, "calibration/mean_confidence": 0.4916340363422075, "calibration/prompt_uniqueness": 0.84775390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 865.6, "completions/max_terminated_length": 421.2, "completions/mean_length": 185.69755859375, "completions/mean_terminated_length": 185.43428955078124, "completions/min_length": 93.6, "completions/min_terminated_length": 93.6, "epoch": 0.576, "grad_norm": 0.0009465691982768476, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 606445250.0, "reward": 0.8475932121276856, "reward_std": 0.08987626880407333, "rewards/accuracy_reward": 0.5220703125, "rewards/brier_reward": 0.7915264964103699, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0030013061594218017, "rewards/frontier_coverage_1": 0.12116425856947899, "rewards/frontier_coverage_10": 0.12115439549088478, "rewards/frontier_coverage_15": 0.12109048813581466, "rewards/frontier_coverage_20": 0.11619948148727417, "rewards/frontier_coverage_25": 0.07896296977996826, "rewards/frontier_coverage_5": 0.12116425856947899, "rewards/frontier_ece_reward": 0.005212780460715294, "rewards/frontier_entropy_batch_reward": -0.20040208101272583, "signal/accuracy_reward/centered_abs_mean": 0.09073486328125, "signal/accuracy_reward/group_bin_occupancy": 0.172265625, "signal/accuracy_reward/group_std_mean": 0.12525396645069123, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045367431640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045367431640625, "signal/advantage_abs_mean": 0.06918673142790795, "signal/advantage_pre_scale_abs_mean": 0.06918673142790795, "signal/advantage_pre_scale_std": 0.1060228943824768, "signal/advantage_std": 0.1060228943824768, "signal/brier_reward/centered_abs_mean": 0.12269987463951111, "signal/brier_reward/group_bin_occupancy": 0.836328125, "signal/brier_reward/group_std_mean": 0.158928182721138, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015337484329938889, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.015337484329938889, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0027809354942291975, "signal/frontier_aurc_reward/group_bin_occupancy": 0.708203125, "signal/frontier_aurc_reward/group_std_mean": 0.004555220529437065, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9778743414208296e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9778743414208296e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16433233320713042, "signal/frontier_coverage_1/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_1/group_std_mean": 0.21237687766551971, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002941548731178045, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002941548731178045, "signal/frontier_coverage_10/centered_abs_mean": 0.16424158215522766, "signal/frontier_coverage_10/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_10/group_std_mean": 0.2122596561908722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029399242252111436, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029399242252111436, "signal/frontier_coverage_15/centered_abs_mean": 0.16389403641223907, "signal/frontier_coverage_15/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_15/group_std_mean": 0.2118108332157135, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002933703176677227, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002933703176677227, "signal/frontier_coverage_20/centered_abs_mean": 0.15368525087833404, "signal/frontier_coverage_20/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_20/group_std_mean": 0.19865911304950715, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027509658131748436, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027509658131748436, "signal/frontier_coverage_25/centered_abs_mean": 0.09549619555473328, "signal/frontier_coverage_25/group_bin_occupancy": 0.87265625, "signal/frontier_coverage_25/group_std_mean": 0.12401848435401916, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017093818169087172, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017093818169087172, "signal/frontier_coverage_5/centered_abs_mean": 0.16433233320713042, "signal/frontier_coverage_5/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_5/group_std_mean": 0.21237687766551971, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002941548731178045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002941548731178045, "signal/frontier_ece_reward/centered_abs_mean": 0.007696983031928539, "signal/frontier_ece_reward/group_bin_occupancy": 0.880078125, "signal/frontier_ece_reward/group_std_mean": 0.01010540798306465, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009621228789910674, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009621228789910674, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2615818977355957, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33764955401420593, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03269773721694946, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03269773721694946, "step": 180 }, { "calibration/aurc": 0.28347549862131505, "calibration/batch_distribution_entropy": 0.980011133919852, "calibration/batch_entropy_100bins": 0.9688122443099946, "calibration/batch_entropy_10bins": 0.980011133919852, "calibration/batch_entropy_50bins": 0.9770535189448722, "calibration/batch_uniqueness": 0.9536908440564685, "calibration/buffer_distribution_entropy": 0.9983131546794096, "calibration/buffer_entropy_100bins": 0.9986737401524538, "calibration/buffer_entropy_10bins": 0.9983131546794096, "calibration/buffer_entropy_50bins": 0.9986542446646809, "calibration/confidence_entropy": 0.47803387557048865, "calibration/coverage@0%": 0.0633347602739726, "calibration/coverage@1%": 0.0633347602739726, "calibration/coverage@10%": 0.2251460066046967, "calibration/coverage@15%": 0.3561093444227006, "calibration/coverage@20%": 0.48078828277886493, "calibration/coverage@25%": 0.5538772015655578, "calibration/coverage@30%": 0.628125, "calibration/coverage@5%": 0.11884326076320939, "calibration/ece": 0.12117534786829438, "calibration/mean_confidence": 0.4956464571020572, "calibration/prompt_uniqueness": 0.8459324800013007, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 899.6, "completions/max_terminated_length": 447.2, "completions/mean_length": 181.71953125, "completions/mean_terminated_length": 181.45542602539064, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.592, "grad_norm": 0.0011055340291932225, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 623473770.0, "reward": 0.8469202160835266, "reward_std": 0.08769658207893372, "rewards/accuracy_reward": 0.5228515625, "rewards/brier_reward": 0.7960270881652832, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.002862662449479103, "rewards/frontier_coverage_1": 0.1255490630865097, "rewards/frontier_coverage_10": 0.12550750821828843, "rewards/frontier_coverage_15": 0.12535116225481033, "rewards/frontier_coverage_20": 0.11695131063461303, "rewards/frontier_coverage_25": 0.07410136461257935, "rewards/frontier_coverage_5": 0.1255490630865097, "rewards/frontier_ece_reward": 0.005222787708044052, "rewards/frontier_entropy_batch_reward": -0.2153420329093933, "signal/accuracy_reward/centered_abs_mean": 0.09036865234375, "signal/accuracy_reward/group_bin_occupancy": 0.170703125, "signal/accuracy_reward/group_std_mean": 0.12226001918315887, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045184326171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045184326171875, "signal/advantage_abs_mean": 0.06837325692176818, "signal/advantage_pre_scale_abs_mean": 0.06837325692176818, "signal/advantage_pre_scale_std": 0.10427495390176773, "signal/advantage_std": 0.10427495390176773, "signal/brier_reward/centered_abs_mean": 0.11565729826688767, "signal/brier_reward/group_bin_occupancy": 0.840625, "signal/brier_reward/group_std_mean": 0.14894945323467254, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014457162283360959, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014457162283360959, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026661295210942625, "signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125, "signal/frontier_aurc_reward/group_std_mean": 0.004355709021911025, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7723716852488e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7723716852488e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16364216804504395, "signal/frontier_coverage_1/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_1/group_std_mean": 0.2090536832809448, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029291946906596423, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029291946906596423, "signal/frontier_coverage_10/centered_abs_mean": 0.163558030128479, "signal/frontier_coverage_10/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_10/group_std_mean": 0.20894888639450074, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002927688602358103, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002927688602358103, "signal/frontier_coverage_15/centered_abs_mean": 0.1632261723279953, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.20853422582149506, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002921748394146562, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002921748394146562, "signal/frontier_coverage_20/centered_abs_mean": 0.15006764531135558, "signal/frontier_coverage_20/group_bin_occupancy": 0.860546875, "signal/frontier_coverage_20/group_std_mean": 0.19213563203811646, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026862107682973147, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026862107682973147, "signal/frontier_coverage_25/centered_abs_mean": 0.08844952881336213, "signal/frontier_coverage_25/group_bin_occupancy": 0.880078125, "signal/frontier_coverage_25/group_std_mean": 0.11421704292297363, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015832465374842285, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015832465374842285, "signal/frontier_coverage_5/centered_abs_mean": 0.16364216804504395, "signal/frontier_coverage_5/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_5/group_std_mean": 0.2090536832809448, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029291946906596423, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029291946906596423, "signal/frontier_ece_reward/centered_abs_mean": 0.007427510805428028, "signal/frontier_ece_reward/group_bin_occupancy": 0.890234375, "signal/frontier_ece_reward/group_std_mean": 0.009686007350683212, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009284388506785035, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009284388506785035, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2786764442920685, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3537044942378998, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03483455553650856, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03483455553650856, "step": 185 }, { "calibration/aurc": 0.23715687440798652, "calibration/batch_distribution_entropy": 0.9721351094227396, "calibration/batch_entropy_100bins": 0.9636682399300932, "calibration/batch_entropy_10bins": 0.9721351094227396, "calibration/batch_entropy_50bins": 0.9708812273088254, "calibration/batch_uniqueness": 0.9519918907307791, "calibration/buffer_distribution_entropy": 0.9983129150316932, "calibration/buffer_entropy_100bins": 0.9986866998459007, "calibration/buffer_entropy_10bins": 0.9983129150316932, "calibration/buffer_entropy_50bins": 0.9986616817533553, "calibration/confidence_entropy": 0.4812024814334549, "calibration/coverage@0%": 0.04922257216242661, "calibration/coverage@1%": 0.05976944716242662, "calibration/coverage@10%": 0.22308815435420745, "calibration/coverage@15%": 0.3262383806262231, "calibration/coverage@20%": 0.47001360689823873, "calibration/coverage@25%": 0.5852846746575342, "calibration/coverage@30%": 0.7067835738747554, "calibration/coverage@5%": 0.1296913221624266, "calibration/ece": 0.10479527222992176, "calibration/mean_confidence": 0.4769995498007412, "calibration/prompt_uniqueness": 0.84160999691077, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 663.6, "completions/max_terminated_length": 431.6, "completions/mean_length": 178.928125, "completions/mean_terminated_length": 178.79558715820312, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 0.608, "grad_norm": 0.0008596270345151424, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 640305482.0, "reward": 0.8485802054405213, "reward_std": 0.08481966853141784, "rewards/accuracy_reward": 0.520703125, "rewards/brier_reward": 0.8082961440086365, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002439832640811801, "rewards/frontier_coverage_1": 0.14013661593198776, "rewards/frontier_coverage_10": 0.14008433520793914, "rewards/frontier_coverage_15": 0.1397281616926193, "rewards/frontier_coverage_20": 0.12882789671421052, "rewards/frontier_coverage_25": 0.08048931509256363, "rewards/frontier_coverage_5": 0.14013533443212509, "rewards/frontier_ece_reward": 0.005031970608979463, "rewards/frontier_entropy_batch_reward": -0.216937056183815, "signal/accuracy_reward/centered_abs_mean": 0.0880615234375, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.1184210166335106, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04403076171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04403076171875, "signal/advantage_abs_mean": 0.06550839766860009, "signal/advantage_pre_scale_abs_mean": 0.06550839766860009, "signal/advantage_pre_scale_std": 0.09949304610490799, "signal/advantage_std": 0.09949304610490799, "signal/brier_reward/centered_abs_mean": 0.11575733423233033, "signal/brier_reward/group_bin_occupancy": 0.84140625, "signal/brier_reward/group_std_mean": 0.1480691760778427, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446966677904129, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01446966677904129, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002259706752374768, "signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375, "signal/frontier_aurc_reward/group_std_mean": 0.0036888211499899624, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.04487487685401e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.04487487685401e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16878078281879424, "signal/frontier_coverage_1/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_1/group_std_mean": 0.21479279398918152, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030211757868528364, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030211757868528364, "signal/frontier_coverage_10/centered_abs_mean": 0.16870121657848358, "signal/frontier_coverage_10/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_10/group_std_mean": 0.21468909978866577, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030197515618056057, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030197515618056057, "signal/frontier_coverage_15/centered_abs_mean": 0.16824153959751129, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.2140854448080063, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030115234199911355, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030115234199911355, "signal/frontier_coverage_20/centered_abs_mean": 0.1515140563249588, "signal/frontier_coverage_20/group_bin_occupancy": 0.8625, "signal/frontier_coverage_20/group_std_mean": 0.19256215989589692, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027121015824377536, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027121015824377536, "signal/frontier_coverage_25/centered_abs_mean": 0.08549174815416336, "signal/frontier_coverage_25/group_bin_occupancy": 0.89453125, "signal/frontier_coverage_25/group_std_mean": 0.10916633754968644, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015303022461012005, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015303022461012005, "signal/frontier_coverage_5/centered_abs_mean": 0.16878008842468262, "signal/frontier_coverage_5/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_5/group_std_mean": 0.21479184925556183, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030211633536964657, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030211633536964657, "signal/frontier_ece_reward/centered_abs_mean": 0.006747147906571627, "signal/frontier_ece_reward/group_bin_occupancy": 0.8875, "signal/frontier_ece_reward/group_std_mean": 0.00879486370831728, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008433934883214534, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008433934883214534, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2829363703727722, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3577865481376648, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035367046296596524, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035367046296596524, "step": 190 }, { "calibration/aurc": 0.24532003825815796, "calibration/batch_distribution_entropy": 0.981679767990388, "calibration/batch_entropy_100bins": 0.9709472161360031, "calibration/batch_entropy_10bins": 0.981679767990388, "calibration/batch_entropy_50bins": 0.9798056822605836, "calibration/batch_uniqueness": 0.95435791015625, "calibration/buffer_distribution_entropy": 0.9985313752795584, "calibration/buffer_entropy_100bins": 0.9988054011236678, "calibration/buffer_entropy_10bins": 0.9985313752795584, "calibration/buffer_entropy_50bins": 0.9987929314111103, "calibration/confidence_entropy": 0.5201001362906903, "calibration/coverage@0%": 0.03359375, "calibration/coverage@1%": 0.03359375, "calibration/coverage@10%": 0.201953125, "calibration/coverage@15%": 0.341796875, "calibration/coverage@20%": 0.436328125, "calibration/coverage@25%": 0.52265625, "calibration/coverage@30%": 0.613671875, "calibration/coverage@5%": 0.112109375, "calibration/ece": 0.10123137926063848, "calibration/mean_confidence": 0.49137512856978677, "calibration/prompt_uniqueness": 0.84775390625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 422.8, "completions/max_terminated_length": 422.8, "completions/mean_length": 180.31982421875, "completions/mean_terminated_length": 180.31982421875, "completions/min_length": 93.8, "completions/min_terminated_length": 93.8, "epoch": 0.624, "grad_norm": 0.001026191283017397, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 657495861.0, "reward": 0.8518246412277222, "reward_std": 0.08756706416606903, "rewards/accuracy_reward": 0.52275390625, "rewards/brier_reward": 0.8068322658538818, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002647676505148411, "rewards/frontier_coverage_1": 0.12973806113004685, "rewards/frontier_coverage_10": 0.12968083024024962, "rewards/frontier_coverage_15": 0.12917735427618027, "rewards/frontier_coverage_20": 0.11514810025691986, "rewards/frontier_coverage_25": 0.06962493434548378, "rewards/frontier_coverage_5": 0.12973549515008925, "rewards/frontier_ece_reward": 0.004124377947300672, "rewards/frontier_entropy_batch_reward": -0.18768059611320495, "signal/accuracy_reward/centered_abs_mean": 0.093621826171875, "signal/accuracy_reward/group_bin_occupancy": 0.16796875, "signal/accuracy_reward/group_std_mean": 0.12246521413326264, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468109130859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0468109130859375, "signal/advantage_abs_mean": 0.06893313750624656, "signal/advantage_pre_scale_abs_mean": 0.06893313750624656, "signal/advantage_pre_scale_std": 0.10512781888246536, "signal/advantage_std": 0.10512781888246536, "signal/brier_reward/centered_abs_mean": 0.10718954056501388, "signal/brier_reward/group_bin_occupancy": 0.858984375, "signal/brier_reward/group_std_mean": 0.1384373813867569, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013398692570626735, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013398692570626735, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0022487165872007607, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625, "signal/frontier_aurc_reward/group_std_mean": 0.0035695353988558056, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0252025792142375e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0252025792142375e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15328652858734132, "signal/frontier_coverage_1/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_1/group_std_mean": 0.19674740433692933, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002743828808888793, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002743828808888793, "signal/frontier_coverage_10/centered_abs_mean": 0.1532078802585602, "signal/frontier_coverage_10/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_10/group_std_mean": 0.19664531350135803, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002742420881986618, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002742420881986618, "signal/frontier_coverage_15/centered_abs_mean": 0.1525299906730652, "signal/frontier_coverage_15/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_15/group_std_mean": 0.1957621306180954, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027302867732942105, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027302867732942105, "signal/frontier_coverage_20/centered_abs_mean": 0.129715932905674, "signal/frontier_coverage_20/group_bin_occupancy": 0.88046875, "signal/frontier_coverage_20/group_std_mean": 0.16696780920028687, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023219150956720115, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023219150956720115, "signal/frontier_coverage_25/centered_abs_mean": 0.06962908133864402, "signal/frontier_coverage_25/group_bin_occupancy": 0.90078125, "signal/frontier_coverage_25/group_std_mean": 0.09048426896333694, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001246360526420176, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001246360526420176, "signal/frontier_coverage_5/centered_abs_mean": 0.15328298211097718, "signal/frontier_coverage_5/group_bin_occupancy": 0.877734375, "signal/frontier_coverage_5/group_std_mean": 0.19674279391765595, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002743765339255333, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002743765339255333, "signal/frontier_ece_reward/centered_abs_mean": 0.00600477633997798, "signal/frontier_ece_reward/group_bin_occupancy": 0.878515625, "signal/frontier_ece_reward/group_std_mean": 0.007920240703970193, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007505970424972475, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007505970424972475, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2654553234577179, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3388149976730347, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033181915432214736, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033181915432214736, "step": 195 }, { "calibration/aurc": 0.24430388293357136, "calibration/batch_distribution_entropy": 0.9791797972652356, "calibration/batch_entropy_100bins": 0.9679413992157719, "calibration/batch_entropy_10bins": 0.9791797972652356, "calibration/batch_entropy_50bins": 0.975538565518886, "calibration/batch_uniqueness": 0.9525665283203125, "calibration/buffer_distribution_entropy": 0.9987113525336898, "calibration/buffer_entropy_100bins": 0.998895678573958, "calibration/buffer_entropy_10bins": 0.9987113525336898, "calibration/buffer_entropy_50bins": 0.9989227369806599, "calibration/confidence_entropy": 0.5174565045374463, "calibration/coverage@0%": 0.07578125, "calibration/coverage@1%": 0.111328125, "calibration/coverage@10%": 0.309375, "calibration/coverage@15%": 0.365234375, "calibration/coverage@20%": 0.4078125, "calibration/coverage@25%": 0.548828125, "calibration/coverage@30%": 0.664453125, "calibration/coverage@5%": 0.22578125, "calibration/ece": 0.16490499070074144, "calibration/mean_confidence": 0.5123002672000428, "calibration/prompt_uniqueness": 0.852099609375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 622.0, "completions/max_terminated_length": 479.4, "completions/mean_length": 184.83974609375, "completions/mean_terminated_length": 183.78404846191407, "completions/min_length": 97.4, "completions/min_terminated_length": 97.4, "epoch": 0.64, "grad_norm": 0.0008454410126432776, "learning_rate": 1e-06, "loss": 0.0014, "num_tokens": 674731308.0, "reward": 0.8708350419998169, "reward_std": 0.08193039745092393, "rewards/accuracy_reward": 0.56953125, "rewards/brier_reward": 0.8082751274108887, "rewards/format_reward": 0.99921875, "rewards/frontier_aurc_reward": -0.002218431932851672, "rewards/frontier_coverage_1": 0.09933431595563888, "rewards/frontier_coverage_10": 0.09924045875668526, "rewards/frontier_coverage_15": 0.09875798150897026, "rewards/frontier_coverage_20": 0.08629776164889336, "rewards/frontier_coverage_25": 0.056573347002267835, "rewards/frontier_coverage_5": 0.09933282062411308, "rewards/frontier_ece_reward": 0.003817522618919611, "rewards/frontier_entropy_batch_reward": -0.19735628366470337, "signal/accuracy_reward/centered_abs_mean": 0.07532958984375, "signal/accuracy_reward/group_bin_occupancy": 0.1640625, "signal/accuracy_reward/group_std_mean": 0.10438980013132096, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037664794921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.037664794921875, "signal/advantage_abs_mean": 0.06335262283682823, "signal/advantage_pre_scale_abs_mean": 0.06335262283682823, "signal/advantage_pre_scale_std": 0.09740418940782547, "signal/advantage_std": 0.09740418940782547, "signal/brier_reward/centered_abs_mean": 0.10196209698915482, "signal/brier_reward/group_bin_occupancy": 0.84765625, "signal/brier_reward/group_std_mean": 0.13130579739809037, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012745262123644352, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012745262123644352, "signal/format_reward/centered_abs_mean": 0.001171875, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.001374816708266735, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005859375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020356971537694333, "signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375, "signal/frontier_aurc_reward/group_std_mean": 0.003529385570436716, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.643897762231063e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.643897762231063e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.139412322640419, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.18100157380104065, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002495480561628938, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002495480561628938, "signal/frontier_coverage_10/centered_abs_mean": 0.13926379680633544, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.18081148266792296, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002492821915075183, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002492821915075183, "signal/frontier_coverage_15/centered_abs_mean": 0.13798445761203765, "signal/frontier_coverage_15/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_15/group_std_mean": 0.17917191088199616, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024699217174202204, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024699217174202204, "signal/frontier_coverage_20/centered_abs_mean": 0.11161820888519287, "signal/frontier_coverage_20/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_20/group_std_mean": 0.1451725423336029, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019979658536612988, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019979658536612988, "signal/frontier_coverage_25/centered_abs_mean": 0.061739873886108396, "signal/frontier_coverage_25/group_bin_occupancy": 0.89921875, "signal/frontier_coverage_25/group_std_mean": 0.08028749227523804, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011051436886191368, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011051436886191368, "signal/frontier_coverage_5/centered_abs_mean": 0.13940848410129547, "signal/frontier_coverage_5/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_5/group_std_mean": 0.1809966504573822, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002495411830022931, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002495411830022931, "signal/frontier_ece_reward/centered_abs_mean": 0.005760820955038071, "signal/frontier_ece_reward/group_bin_occupancy": 0.866796875, "signal/frontier_ece_reward/group_std_mean": 0.00775869581848383, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007201026193797589, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007201026193797589, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2656015157699585, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34027169942855834, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03320018947124481, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03320018947124481, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.41854034890006203, "eval_calibration/batch_distribution_entropy": 0.9258790022452184, "eval_calibration/batch_entropy_100bins": 0.7155226945000874, "eval_calibration/batch_entropy_10bins": 0.9258790022452184, "eval_calibration/batch_entropy_50bins": 0.7976394251687033, "eval_calibration/batch_uniqueness": 0.8994140625, "eval_calibration/buffer_distribution_entropy": 0.9987820695130332, "eval_calibration/buffer_entropy_100bins": 0.9989213084455153, "eval_calibration/buffer_entropy_10bins": 0.9987820695130332, "eval_calibration/buffer_entropy_50bins": 0.9989669617425297, "eval_calibration/confidence_entropy": 0.49184587531394314, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.1328125, "eval_calibration/coverage@15%": 0.171875, "eval_calibration/coverage@20%": 0.1953125, "eval_calibration/coverage@25%": 0.2265625, "eval_calibration/coverage@30%": 0.2578125, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.17095216040466216, "eval_calibration/mean_confidence": 0.45492840413455565, "eval_calibration/prompt_uniqueness": 0.8994140625, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 295.25, "eval_completions/max_terminated_length": 295.25, "eval_completions/mean_length": 179.74158096313477, "eval_completions/mean_terminated_length": 179.74158096313477, "eval_completions/min_length": 109.75, "eval_completions/min_terminated_length": 109.75, "eval_loss": 0.0, "eval_num_tokens": 674731308.0, "eval_reward": 0.7049643099308014, "eval_reward_std": 0.22184203192591667, "eval_rewards/accuracy_reward": 0.423828125, "eval_rewards/brier_reward": 0.7956392019987106, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0033663903595879674, "eval_rewards/frontier_coverage_1": 0.19531626999378204, "eval_rewards/frontier_coverage_10": 0.1952117159962654, "eval_rewards/frontier_coverage_15": 0.19375700131058693, "eval_rewards/frontier_coverage_20": 0.15371991135179996, "eval_rewards/frontier_coverage_25": 0.08286740258336067, "eval_rewards/frontier_coverage_5": 0.19531207531690598, "eval_rewards/frontier_ece_reward": 0.003727212024386972, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 17.1514, "eval_samples_per_second": 29.152, "eval_signal/accuracy_reward/centered_abs_mean": 0.4744873046875, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49458901584148407, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23724365234375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23724365234375, "eval_signal/advantage_abs_mean": 0.2057364284992218, "eval_signal/advantage_pre_scale_abs_mean": 0.2057364284992218, "eval_signal/advantage_pre_scale_std": 0.21934344619512558, "eval_signal/advantage_std": 0.21934344619512558, "eval_signal/brier_reward/centered_abs_mean": 0.1897713765501976, "eval_signal/brier_reward/group_bin_occupancy": 0.8984375, "eval_signal/brier_reward/group_std_mean": 0.24221712350845337, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0237214220687747, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.0237214220687747, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004152168636210263, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.007893728208728135, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.432381426042411e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.432381426042411e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3731478080153465, "eval_signal/frontier_coverage_1/group_bin_occupancy": 1.0, "eval_signal/frontier_coverage_1/group_std_mean": 0.45107389986515045, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006679345387965441, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006679345387965441, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.37288998067379, "eval_signal/frontier_coverage_10/group_bin_occupancy": 1.0, "eval_signal/frontier_coverage_10/group_std_mean": 0.45077458769083023, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006674730451777577, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006674730451777577, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.36930492520332336, "eval_signal/frontier_coverage_15/group_bin_occupancy": 1.0, "eval_signal/frontier_coverage_15/group_std_mean": 0.4466145858168602, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0066105579026043415, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0066105579026043415, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.29113033413887024, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_20/group_std_mean": 0.354678250849247, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005211232579313219, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005211232579313219, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.13845044746994972, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375, "eval_signal/frontier_coverage_25/group_std_mean": 0.17535366117954254, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002478263049852103, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002478263049852103, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3731374442577362, "eval_signal/frontier_coverage_5/group_bin_occupancy": 1.0, "eval_signal/frontier_coverage_5/group_std_mean": 0.4510618671774864, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006679159821942449, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006679159821942449, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.007429954246617854, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875, "eval_signal/frontier_ece_reward/group_std_mean": 0.010206094710156322, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009287442808272317, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009287442808272317, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.233, "step": 200 }, { "calibration/aurc": 0.4072680963308123, "calibration/batch_distribution_entropy": 0.9719537361453303, "calibration/batch_entropy_100bins": 0.9645036495344895, "calibration/batch_entropy_10bins": 0.9719537361453303, "calibration/batch_entropy_50bins": 0.972271453991881, "calibration/batch_uniqueness": 0.9524566650390625, "calibration/buffer_distribution_entropy": 0.9988068526161396, "calibration/buffer_entropy_100bins": 0.9989106275315882, "calibration/buffer_entropy_10bins": 0.9988068526161396, "calibration/buffer_entropy_50bins": 0.9989733950683586, "calibration/confidence_entropy": 0.5230128680468962, "calibration/coverage@0%": 0.003515625, "calibration/coverage@1%": 0.003515625, "calibration/coverage@10%": 0.009765625, "calibration/coverage@15%": 0.019140625, "calibration/coverage@20%": 0.0875, "calibration/coverage@25%": 0.137890625, "calibration/coverage@30%": 0.26171875, "calibration/coverage@5%": 0.003515625, "calibration/ece": 0.1075798181494427, "calibration/mean_confidence": 0.4706070682994466, "calibration/prompt_uniqueness": 0.853369140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 614.6, "completions/max_terminated_length": 390.6, "completions/mean_length": 182.33017578125, "completions/mean_terminated_length": 182.19835510253907, "completions/min_length": 91.2, "completions/min_terminated_length": 91.2, "epoch": 0.656, "grad_norm": 0.001009272993542254, "learning_rate": 1e-06, "loss": 0.0002, "num_tokens": 691454913.0, "reward": 0.8378146886825562, "reward_std": 0.09044925570487976, "rewards/accuracy_reward": 0.50419921875, "rewards/brier_reward": 0.7853815197944641, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.003076691273599863, "rewards/frontier_coverage_1": 0.1220921441912651, "rewards/frontier_coverage_10": 0.1220327764749527, "rewards/frontier_coverage_15": 0.12089861333370208, "rewards/frontier_coverage_20": 0.09999236166477203, "rewards/frontier_coverage_25": 0.05890063121914864, "rewards/frontier_coverage_5": 0.12208605259656906, "rewards/frontier_ece_reward": 0.003139182738959789, "rewards/frontier_entropy_batch_reward": -0.19447652399539947, "signal/accuracy_reward/centered_abs_mean": 0.098480224609375, "signal/accuracy_reward/group_bin_occupancy": 0.171875, "signal/accuracy_reward/group_std_mean": 0.12976800352334977, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0492401123046875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0492401123046875, "signal/advantage_abs_mean": 0.07178077697753907, "signal/advantage_pre_scale_abs_mean": 0.07178077697753907, "signal/advantage_pre_scale_std": 0.10841633677482605, "signal/advantage_std": 0.10841633677482605, "signal/brier_reward/centered_abs_mean": 0.11750788986682892, "signal/brier_reward/group_bin_occupancy": 0.852734375, "signal/brier_reward/group_std_mean": 0.15079601109027863, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014688486233353615, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014688486233353615, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0026406456716358663, "signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875, "signal/frontier_aurc_reward/group_std_mean": 0.004412023955956102, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7267557238228616e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7267557238228616e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16164307296276093, "signal/frontier_coverage_1/group_bin_occupancy": 0.86875, "signal/frontier_coverage_1/group_std_mean": 0.2075218141078949, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028934108559042215, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028934108559042215, "signal/frontier_coverage_10/centered_abs_mean": 0.16153694093227386, "signal/frontier_coverage_10/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_10/group_std_mean": 0.207388174533844, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002891511144116521, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002891511144116521, "signal/frontier_coverage_15/centered_abs_mean": 0.15967210829257966, "signal/frontier_coverage_15/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_15/group_std_mean": 0.20502502024173735, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028581305872648955, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028581305872648955, "signal/frontier_coverage_20/centered_abs_mean": 0.12663253098726274, "signal/frontier_coverage_20/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_20/group_std_mean": 0.16340535879135132, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002266722172498703, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002266722172498703, "signal/frontier_coverage_25/centered_abs_mean": 0.0673256479203701, "signal/frontier_coverage_25/group_bin_occupancy": 0.896875, "signal/frontier_coverage_25/group_std_mean": 0.08757460862398148, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012051290133967996, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012051290133967996, "signal/frontier_coverage_5/centered_abs_mean": 0.1616332322359085, "signal/frontier_coverage_5/group_bin_occupancy": 0.86875, "signal/frontier_coverage_5/group_std_mean": 0.20750951170921325, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028932347893714907, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028932347893714907, "signal/frontier_ece_reward/centered_abs_mean": 0.0055423608049750325, "signal/frontier_ece_reward/group_bin_occupancy": 0.8828125, "signal/frontier_ece_reward/group_std_mean": 0.007337391003966331, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006927951006218791, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006927951006218791, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266719377040863, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3390504062175751, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333992213010788, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333992213010788, "step": 205 }, { "calibration/aurc": 0.30184251919884036, "calibration/batch_distribution_entropy": 0.9766400147465495, "calibration/batch_entropy_100bins": 0.9661916960682403, "calibration/batch_entropy_10bins": 0.9766400147465495, "calibration/batch_entropy_50bins": 0.975390516981222, "calibration/batch_uniqueness": 0.953192138671875, "calibration/buffer_distribution_entropy": 0.9989337853009663, "calibration/buffer_entropy_100bins": 0.9989487435236883, "calibration/buffer_entropy_10bins": 0.9989337853009663, "calibration/buffer_entropy_50bins": 0.9990317545857466, "calibration/confidence_entropy": 0.4864500476998418, "calibration/coverage@0%": 0.019140625, "calibration/coverage@1%": 0.019140625, "calibration/coverage@10%": 0.166796875, "calibration/coverage@15%": 0.203515625, "calibration/coverage@20%": 0.25, "calibration/coverage@25%": 0.29921875, "calibration/coverage@30%": 0.41796875, "calibration/coverage@5%": 0.112890625, "calibration/ece": 0.14628865248087672, "calibration/mean_confidence": 0.4938824967603755, "calibration/prompt_uniqueness": 0.848876953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 387.6, "completions/max_terminated_length": 387.6, "completions/mean_length": 181.459765625, "completions/mean_terminated_length": 181.459765625, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.672, "grad_norm": 0.0008657427970319986, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 708226501.0, "reward": 0.8453529119491577, "reward_std": 0.08392495959997177, "rewards/accuracy_reward": 0.51845703125, "rewards/brier_reward": 0.7956361413002014, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002716613910160959, "rewards/frontier_coverage_1": 0.1360724911093712, "rewards/frontier_coverage_10": 0.13601431995630264, "rewards/frontier_coverage_15": 0.13472481966018676, "rewards/frontier_coverage_20": 0.11215179413557053, "rewards/frontier_coverage_25": 0.0678616002202034, "rewards/frontier_coverage_5": 0.13606539219617844, "rewards/frontier_ece_reward": 0.0036563334055244924, "rewards/frontier_entropy_batch_reward": -0.2134263336658478, "signal/accuracy_reward/centered_abs_mean": 0.093804931640625, "signal/accuracy_reward/group_bin_occupancy": 0.17109375, "signal/accuracy_reward/group_std_mean": 0.12641526907682418, "signal/accuracy_reward/group_zero_std_frac": 0.63125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0469024658203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0469024658203125, "signal/advantage_abs_mean": 0.06465236023068428, "signal/advantage_pre_scale_abs_mean": 0.06465236023068428, "signal/advantage_pre_scale_std": 0.0998497799038887, "signal/advantage_std": 0.0998497799038887, "signal/brier_reward/centered_abs_mean": 0.11606302261352539, "signal/brier_reward/group_bin_occupancy": 0.844140625, "signal/brier_reward/group_std_mean": 0.14761213660240174, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014507877826690673, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014507877826690673, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023735316237434743, "signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125, "signal/frontier_aurc_reward/group_std_mean": 0.003954212227836251, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2486215534154326e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2486215534154326e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1699829190969467, "signal/frontier_coverage_1/group_bin_occupancy": 0.851171875, "signal/frontier_coverage_1/group_std_mean": 0.21657621562480928, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003042694181203842, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003042694181203842, "signal/frontier_coverage_10/centered_abs_mean": 0.1698471039533615, "signal/frontier_coverage_10/group_bin_occupancy": 0.851171875, "signal/frontier_coverage_10/group_std_mean": 0.21641322374343872, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003040262870490551, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003040262870490551, "signal/frontier_coverage_15/centered_abs_mean": 0.16724947690963746, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.213279390335083, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029937655199319124, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029937655199319124, "signal/frontier_coverage_20/centered_abs_mean": 0.12853406816720964, "signal/frontier_coverage_20/group_bin_occupancy": 0.8453125, "signal/frontier_coverage_20/group_std_mean": 0.1650033712387085, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002300759730860591, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002300759730860591, "signal/frontier_coverage_25/centered_abs_mean": 0.07163915932178497, "signal/frontier_coverage_25/group_bin_occupancy": 0.901953125, "signal/frontier_coverage_25/group_std_mean": 0.09214308261871337, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001282340893521905, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001282340893521905, "signal/frontier_coverage_5/centered_abs_mean": 0.16996634304523467, "signal/frontier_coverage_5/group_bin_occupancy": 0.851171875, "signal/frontier_coverage_5/group_std_mean": 0.2165563225746155, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003042397554963827, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003042397554963827, "signal/frontier_ece_reward/centered_abs_mean": 0.0058284570463001725, "signal/frontier_ece_reward/group_bin_occupancy": 0.867578125, "signal/frontier_ece_reward/group_std_mean": 0.007644351571798325, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007285571307875216, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007285571307875216, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624157965183258, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3384029269218445, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032801974564790726, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032801974564790726, "step": 210 }, { "calibration/aurc": 0.32277610924710637, "calibration/batch_distribution_entropy": 0.9701296713733163, "calibration/batch_entropy_100bins": 0.9644057869188065, "calibration/batch_entropy_10bins": 0.9701296713733163, "calibration/batch_entropy_50bins": 0.9714994429885904, "calibration/batch_uniqueness": 0.951641845703125, "calibration/buffer_distribution_entropy": 0.9989417647738268, "calibration/buffer_entropy_100bins": 0.9989298633022411, "calibration/buffer_entropy_10bins": 0.9989417647738268, "calibration/buffer_entropy_50bins": 0.9990118814717297, "calibration/confidence_entropy": 0.513002509873567, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.151953125, "calibration/coverage@15%": 0.240234375, "calibration/coverage@20%": 0.323046875, "calibration/coverage@25%": 0.4640625, "calibration/coverage@30%": 0.544921875, "calibration/coverage@5%": 0.037890625, "calibration/ece": 0.12407527470589289, "calibration/mean_confidence": 0.4836047739947819, "calibration/prompt_uniqueness": 0.8462890625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 860.4, "completions/max_terminated_length": 406.2, "completions/mean_length": 185.49267578125, "completions/mean_terminated_length": 185.22888793945313, "completions/min_length": 92.2, "completions/min_terminated_length": 92.2, "epoch": 0.688, "grad_norm": 0.0011569778434932232, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 725079866.0, "reward": 0.8533730626106262, "reward_std": 0.09096147418022156, "rewards/accuracy_reward": 0.5375, "rewards/brier_reward": 0.7997807264328003, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0025234234519302843, "rewards/frontier_coverage_1": 0.1166018046438694, "rewards/frontier_coverage_10": 0.11651684194803238, "rewards/frontier_coverage_15": 0.11543264091014863, "rewards/frontier_coverage_20": 0.08750456124544144, "rewards/frontier_coverage_25": 0.05482863634824753, "rewards/frontier_coverage_5": 0.11660146117210388, "rewards/frontier_ece_reward": 0.002992427349090576, "rewards/frontier_entropy_batch_reward": -0.21163803935050965, "signal/accuracy_reward/centered_abs_mean": 0.0998291015625, "signal/accuracy_reward/group_bin_occupancy": 0.1765625, "signal/accuracy_reward/group_std_mean": 0.13668281584978104, "signal/accuracy_reward/group_zero_std_frac": 0.5875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04991455078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04991455078125, "signal/advantage_abs_mean": 0.06996657401323318, "signal/advantage_pre_scale_abs_mean": 0.06996657401323318, "signal/advantage_pre_scale_std": 0.10759487152099609, "signal/advantage_std": 0.10759487152099609, "signal/brier_reward/centered_abs_mean": 0.10977463126182556, "signal/brier_reward/group_bin_occupancy": 0.861328125, "signal/brier_reward/group_std_mean": 0.14089445173740386, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013721828907728195, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013721828907728195, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020935308886691926, "signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375, "signal/frontier_aurc_reward/group_std_mean": 0.0034752024803310633, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.747420341824181e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.747420341824181e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.16087363958358764, "signal/frontier_coverage_1/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_1/group_std_mean": 0.20536437928676604, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002879638038575649, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002879638038575649, "signal/frontier_coverage_10/centered_abs_mean": 0.16079167425632476, "signal/frontier_coverage_10/group_bin_occupancy": 0.866015625, "signal/frontier_coverage_10/group_std_mean": 0.20525703132152556, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002878170693293214, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002878170693293214, "signal/frontier_coverage_15/centered_abs_mean": 0.15650815665721893, "signal/frontier_coverage_15/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_15/group_std_mean": 0.19971639513969422, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028014959301799537, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028014959301799537, "signal/frontier_coverage_20/centered_abs_mean": 0.1123495414853096, "signal/frontier_coverage_20/group_bin_occupancy": 0.8609375, "signal/frontier_coverage_20/group_std_mean": 0.14411205649375916, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020110567333176733, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020110567333176733, "signal/frontier_coverage_25/centered_abs_mean": 0.06338529288768768, "signal/frontier_coverage_25/group_bin_occupancy": 0.9078125, "signal/frontier_coverage_25/group_std_mean": 0.0813615933060646, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011345966951921583, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011345966951921583, "signal/frontier_coverage_5/centered_abs_mean": 0.16086728274822235, "signal/frontier_coverage_5/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_5/group_std_mean": 0.20535596311092377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002879524324089289, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002879524324089289, "signal/frontier_ece_reward/centered_abs_mean": 0.0054166271351277825, "signal/frontier_ece_reward/group_bin_occupancy": 0.882421875, "signal/frontier_ece_reward/group_std_mean": 0.00712386667728424, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006770783918909728, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006770783918909728, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2808054625988007, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35399608612060546, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035100682824850085, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035100682824850085, "step": 215 }, { "calibration/aurc": 0.2567679202323507, "calibration/batch_distribution_entropy": 0.9753718666465045, "calibration/batch_entropy_100bins": 0.9653751735288918, "calibration/batch_entropy_10bins": 0.9753718666465045, "calibration/batch_entropy_50bins": 0.9753353539912812, "calibration/batch_uniqueness": 0.9522705078125, "calibration/buffer_distribution_entropy": 0.9989540322458701, "calibration/buffer_entropy_100bins": 0.9989665872140143, "calibration/buffer_entropy_10bins": 0.9989540322458701, "calibration/buffer_entropy_50bins": 0.9990419447527051, "calibration/confidence_entropy": 0.4933685141452767, "calibration/coverage@0%": 0.008203125, "calibration/coverage@1%": 0.008203125, "calibration/coverage@10%": 0.11015625, "calibration/coverage@15%": 0.191015625, "calibration/coverage@20%": 0.31171875, "calibration/coverage@25%": 0.5234375, "calibration/coverage@30%": 0.6765625, "calibration/coverage@5%": 0.024609375, "calibration/ece": 0.10514110855139244, "calibration/mean_confidence": 0.5225538825879033, "calibration/prompt_uniqueness": 0.837548828125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 687.4, "completions/max_terminated_length": 469.4, "completions/mean_length": 189.19560546875, "completions/mean_terminated_length": 189.06415710449218, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.704, "grad_norm": 0.00079206726513803, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 741883373.0, "reward": 0.8564110398292542, "reward_std": 0.08603468835353852, "rewards/accuracy_reward": 0.5404296875, "rewards/brier_reward": 0.8099352717399597, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002810010826215148, "rewards/frontier_coverage_1": 0.11663352549076081, "rewards/frontier_coverage_10": 0.11659030914306641, "rewards/frontier_coverage_15": 0.1124075010418892, "rewards/frontier_coverage_20": 0.08771874606609345, "rewards/frontier_coverage_25": 0.0565977543592453, "rewards/frontier_coverage_5": 0.11662895604968071, "rewards/frontier_ece_reward": 0.0033508235588669776, "rewards/frontier_entropy_batch_reward": -0.20978534519672393, "signal/accuracy_reward/centered_abs_mean": 0.082080078125, "signal/accuracy_reward/group_bin_occupancy": 0.16484375, "signal/accuracy_reward/group_std_mean": 0.10941672474145889, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0410400390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0410400390625, "signal/advantage_abs_mean": 0.06718932390213013, "signal/advantage_pre_scale_abs_mean": 0.06718932390213013, "signal/advantage_pre_scale_std": 0.10292920172214508, "signal/advantage_std": 0.10292920172214508, "signal/brier_reward/centered_abs_mean": 0.10521638691425324, "signal/brier_reward/group_bin_occupancy": 0.8578125, "signal/brier_reward/group_std_mean": 0.13554594218730925, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013152048364281655, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013152048364281655, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024594481103122233, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74765625, "signal/frontier_aurc_reward/group_std_mean": 0.004012216068804264, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.402412014314905e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.402412014314905e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14190192222595216, "signal/frontier_coverage_1/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_1/group_std_mean": 0.18161689043045043, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025400443468242885, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025400443468242885, "signal/frontier_coverage_10/centered_abs_mean": 0.14177892506122589, "signal/frontier_coverage_10/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_10/group_std_mean": 0.18146247267723084, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002537842746824026, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002537842746824026, "signal/frontier_coverage_15/centered_abs_mean": 0.1324725031852722, "signal/frontier_coverage_15/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_15/group_std_mean": 0.16975606381893157, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023712576366961002, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023712576366961002, "signal/frontier_coverage_20/centered_abs_mean": 0.09445251375436783, "signal/frontier_coverage_20/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_20/group_std_mean": 0.12184825539588928, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001690700021572411, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001690700021572411, "signal/frontier_coverage_25/centered_abs_mean": 0.05518615916371346, "signal/frontier_coverage_25/group_bin_occupancy": 0.925, "signal/frontier_coverage_25/group_std_mean": 0.07120932638645172, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009878322365693749, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009878322365693749, "signal/frontier_coverage_5/centered_abs_mean": 0.1418927103281021, "signal/frontier_coverage_5/group_bin_occupancy": 0.874609375, "signal/frontier_coverage_5/group_std_mean": 0.18160516917705535, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025398793630301954, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025398793630301954, "signal/frontier_ece_reward/centered_abs_mean": 0.005274960119277239, "signal/frontier_ece_reward/group_bin_occupancy": 0.878515625, "signal/frontier_ece_reward/group_std_mean": 0.006980370450764895, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006593700149096548, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006593700149096548, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28154911994934084, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.733203125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3582507610321045, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035193639993667605, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035193639993667605, "step": 220 }, { "calibration/aurc": 0.23126784031942532, "calibration/batch_distribution_entropy": 0.9914821225669396, "calibration/batch_entropy_100bins": 0.9745556431012833, "calibration/batch_entropy_10bins": 0.9914821225669396, "calibration/batch_entropy_50bins": 0.9834161123379925, "calibration/batch_uniqueness": 0.9553955078125, "calibration/buffer_distribution_entropy": 0.9990940525630616, "calibration/buffer_entropy_100bins": 0.9990462171221427, "calibration/buffer_entropy_10bins": 0.9990940525630616, "calibration/buffer_entropy_50bins": 0.9991333015296009, "calibration/confidence_entropy": 0.4974279364345352, "calibration/coverage@0%": 0.08125, "calibration/coverage@1%": 0.1296875, "calibration/coverage@10%": 0.297265625, "calibration/coverage@15%": 0.3734375, "calibration/coverage@20%": 0.453125, "calibration/coverage@25%": 0.551171875, "calibration/coverage@30%": 0.6421875, "calibration/coverage@5%": 0.225390625, "calibration/ece": 0.13818892162141455, "calibration/mean_confidence": 0.528279888360925, "calibration/prompt_uniqueness": 0.8396484375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.4, "completions/max_terminated_length": 392.4, "completions/mean_length": 193.5751953125, "completions/mean_terminated_length": 193.5751953125, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 0.72, "grad_norm": 0.0008994463132694364, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 758875439.0, "reward": 0.867469334602356, "reward_std": 0.08709415346384049, "rewards/accuracy_reward": 0.563671875, "rewards/brier_reward": 0.8105340003967285, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002341646375134587, "rewards/frontier_coverage_1": 0.10286648273468017, "rewards/frontier_coverage_10": 0.10283031612634659, "rewards/frontier_coverage_15": 0.09793300032615662, "rewards/frontier_coverage_20": 0.07525258213281631, "rewards/frontier_coverage_25": 0.05333108454942703, "rewards/frontier_coverage_5": 0.10286374539136886, "rewards/frontier_ece_reward": 0.003090843977406621, "rewards/frontier_entropy_batch_reward": -0.20484532713890075, "signal/accuracy_reward/centered_abs_mean": 0.08983154296875, "signal/accuracy_reward/group_bin_occupancy": 0.1703125, "signal/accuracy_reward/group_std_mean": 0.12185031622648239, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044915771484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044915771484375, "signal/advantage_abs_mean": 0.06714712977409362, "signal/advantage_pre_scale_abs_mean": 0.06714712977409362, "signal/advantage_pre_scale_std": 0.10354482531547546, "signal/advantage_std": 0.10354482531547546, "signal/brier_reward/centered_abs_mean": 0.10361835062503814, "signal/brier_reward/group_bin_occupancy": 0.8609375, "signal/brier_reward/group_std_mean": 0.1339139461517334, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012952293828129768, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012952293828129768, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002101215533912182, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125, "signal/frontier_aurc_reward/group_std_mean": 0.0033895236440002917, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7611756488331595e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7611756488331595e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14568218886852263, "signal/frontier_coverage_1/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_1/group_std_mean": 0.18670837283134462, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002607711125165224, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002607711125165224, "signal/frontier_coverage_10/centered_abs_mean": 0.14543514251708983, "signal/frontier_coverage_10/group_bin_occupancy": 0.873828125, "signal/frontier_coverage_10/group_std_mean": 0.1864002525806427, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026032889261841776, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026032889261841776, "signal/frontier_coverage_15/centered_abs_mean": 0.1325514554977417, "signal/frontier_coverage_15/group_bin_occupancy": 0.870703125, "signal/frontier_coverage_15/group_std_mean": 0.17030819058418273, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023726709187030792, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023726709187030792, "signal/frontier_coverage_20/centered_abs_mean": 0.0897199884057045, "signal/frontier_coverage_20/group_bin_occupancy": 0.8765625, "signal/frontier_coverage_20/group_std_mean": 0.11619268357753754, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016059877583757044, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016059877583757044, "signal/frontier_coverage_25/centered_abs_mean": 0.054404760152101515, "signal/frontier_coverage_25/group_bin_occupancy": 0.928515625, "signal/frontier_coverage_25/group_std_mean": 0.06992583870887756, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009738451801240445, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009738451801240445, "signal/frontier_coverage_5/centered_abs_mean": 0.14567132890224457, "signal/frontier_coverage_5/group_bin_occupancy": 0.873046875, "signal/frontier_coverage_5/group_std_mean": 0.18669503033161164, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026075166650116445, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026075166650116445, "signal/frontier_ece_reward/centered_abs_mean": 0.0052463172003626825, "signal/frontier_ece_reward/group_bin_occupancy": 0.901953125, "signal/frontier_ece_reward/group_std_mean": 0.006824824120849371, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006557896500453353, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006557896500453353, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2725887656211853, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.736328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3432928442955017, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03407359570264816, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03407359570264816, "step": 225 }, { "calibration/aurc": 0.251207175508105, "calibration/batch_distribution_entropy": 0.9803575481156237, "calibration/batch_entropy_100bins": 0.96926214899048, "calibration/batch_entropy_10bins": 0.9803575481156237, "calibration/batch_entropy_50bins": 0.9778726337355224, "calibration/batch_uniqueness": 0.9536702349762987, "calibration/buffer_distribution_entropy": 0.9991889846010944, "calibration/buffer_entropy_100bins": 0.9990874643844343, "calibration/buffer_entropy_10bins": 0.9991889846010944, "calibration/buffer_entropy_50bins": 0.9991898926831168, "calibration/confidence_entropy": 0.48241655738057865, "calibration/coverage@0%": 0.022267153864970645, "calibration/coverage@1%": 0.022267153864970645, "calibration/coverage@10%": 0.15632491438356164, "calibration/coverage@15%": 0.21420162671232879, "calibration/coverage@20%": 0.4169658145792564, "calibration/coverage@25%": 0.5560673006360078, "calibration/coverage@30%": 0.6850255320450097, "calibration/coverage@5%": 0.1152718321917808, "calibration/ece": 0.13081064207628812, "calibration/mean_confidence": 0.533058629028637, "calibration/prompt_uniqueness": 0.8375652904689126, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 881.6, "completions/max_terminated_length": 427.8, "completions/mean_length": 198.65859375, "completions/mean_terminated_length": 198.26717834472657, "completions/min_length": 100.8, "completions/min_terminated_length": 100.8, "epoch": 0.736, "grad_norm": 0.0006931371171958745, "learning_rate": 1e-06, "loss": 0.0005, "num_tokens": 775849287.0, "reward": 0.8676259756088257, "reward_std": 0.08426170200109481, "rewards/accuracy_reward": 0.5630859375, "rewards/brier_reward": 0.8032041311264038, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.0026727572083473207, "rewards/frontier_coverage_1": 0.10416304171085358, "rewards/frontier_coverage_10": 0.10409975200891494, "rewards/frontier_coverage_15": 0.09961197376251221, "rewards/frontier_coverage_20": 0.07543607577681541, "rewards/frontier_coverage_25": 0.0561057448387146, "rewards/frontier_coverage_5": 0.10416053682565689, "rewards/frontier_ece_reward": 0.002644325466826558, "rewards/frontier_entropy_batch_reward": -0.19347001612186432, "signal/accuracy_reward/centered_abs_mean": 0.0850830078125, "signal/accuracy_reward/group_bin_occupancy": 0.1671875, "signal/accuracy_reward/group_std_mean": 0.114646577835083, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04254150390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04254150390625, "signal/advantage_abs_mean": 0.06527714878320694, "signal/advantage_pre_scale_abs_mean": 0.06527714878320694, "signal/advantage_pre_scale_std": 0.10004038214683533, "signal/advantage_std": 0.10004038214683533, "signal/brier_reward/centered_abs_mean": 0.10927082747220992, "signal/brier_reward/group_bin_occupancy": 0.831640625, "signal/brier_reward/group_std_mean": 0.14201997220516205, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01365885343402624, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01365885343402624, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023929367307573557, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73125, "signal/frontier_aurc_reward/group_std_mean": 0.0037743649911135433, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.283356502128299e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.283356502128299e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14819615185260773, "signal/frontier_coverage_1/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_1/group_std_mean": 0.19186924695968627, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026527110021561384, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026527110021561384, "signal/frontier_coverage_10/centered_abs_mean": 0.147468763589859, "signal/frontier_coverage_10/group_bin_occupancy": 0.850390625, "signal/frontier_coverage_10/group_std_mean": 0.1909423440694809, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396906469017267, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396906469017267, "signal/frontier_coverage_15/centered_abs_mean": 0.1348020002245903, "signal/frontier_coverage_15/group_bin_occupancy": 0.840234375, "signal/frontier_coverage_15/group_std_mean": 0.17472992837429047, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024129556957632305, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024129556957632305, "signal/frontier_coverage_20/centered_abs_mean": 0.08800848871469498, "signal/frontier_coverage_20/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_20/group_std_mean": 0.11463980823755264, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015753519488498568, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015753519488498568, "signal/frontier_coverage_25/centered_abs_mean": 0.055414053797721866, "signal/frontier_coverage_25/group_bin_occupancy": 0.925390625, "signal/frontier_coverage_25/group_std_mean": 0.07170938104391097, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000991911522578448, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000991911522578448, "signal/frontier_coverage_5/centered_abs_mean": 0.14818698167800903, "signal/frontier_coverage_5/group_bin_occupancy": 0.84921875, "signal/frontier_coverage_5/group_std_mean": 0.19185736775398254, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026525467168539763, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026525467168539763, "signal/frontier_ece_reward/centered_abs_mean": 0.005124002322554588, "signal/frontier_ece_reward/group_bin_occupancy": 0.89609375, "signal/frontier_ece_reward/group_std_mean": 0.006701454985886812, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006405002903193235, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006405002903193235, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26818968653678893, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34574413299560547, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033523710817098616, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033523710817098616, "step": 230 }, { "calibration/aurc": 0.2622214479199293, "calibration/batch_distribution_entropy": 0.9764551048609474, "calibration/batch_entropy_100bins": 0.9666018818532802, "calibration/batch_entropy_10bins": 0.9764551048609474, "calibration/batch_entropy_50bins": 0.9763242677062596, "calibration/batch_uniqueness": 0.9524139404296875, "calibration/buffer_distribution_entropy": 0.9991939792089879, "calibration/buffer_entropy_100bins": 0.9990739400539542, "calibration/buffer_entropy_10bins": 0.9991939792089879, "calibration/buffer_entropy_50bins": 0.9991816292648175, "calibration/confidence_entropy": 0.4706510873405473, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.100390625, "calibration/coverage@15%": 0.246484375, "calibration/coverage@20%": 0.447265625, "calibration/coverage@25%": 0.584765625, "calibration/coverage@30%": 0.669140625, "calibration/coverage@5%": 0.03515625, "calibration/ece": 0.11216717108532222, "calibration/mean_confidence": 0.48983524720107646, "calibration/prompt_uniqueness": 0.8369140625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 411.8, "completions/max_terminated_length": 411.8, "completions/mean_length": 198.97177734375, "completions/mean_terminated_length": 198.97177734375, "completions/min_length": 105.6, "completions/min_terminated_length": 105.6, "epoch": 0.752, "grad_norm": 0.0007598252850584686, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 793113958.0, "reward": 0.8673793315887451, "reward_std": 0.0860441878437996, "rewards/accuracy_reward": 0.56337890625, "rewards/brier_reward": 0.8044471979141236, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002977763069793582, "rewards/frontier_coverage_1": 0.10541683062911034, "rewards/frontier_coverage_10": 0.10495719313621521, "rewards/frontier_coverage_15": 0.09651436656713486, "rewards/frontier_coverage_20": 0.07021676413714886, "rewards/frontier_coverage_25": 0.050591808184981345, "rewards/frontier_coverage_5": 0.10540874376893043, "rewards/frontier_ece_reward": 0.0025658421916887166, "rewards/frontier_entropy_batch_reward": -0.1974082589149475, "signal/accuracy_reward/centered_abs_mean": 0.083087158203125, "signal/accuracy_reward/group_bin_occupancy": 0.168359375, "signal/accuracy_reward/group_std_mean": 0.11437420845031739, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415435791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0415435791015625, "signal/advantage_abs_mean": 0.06670793667435646, "signal/advantage_pre_scale_abs_mean": 0.06670793667435646, "signal/advantage_pre_scale_std": 0.10405694842338561, "signal/advantage_std": 0.10405694842338561, "signal/brier_reward/centered_abs_mean": 0.1071990892291069, "signal/brier_reward/group_bin_occupancy": 0.837890625, "signal/brier_reward/group_std_mean": 0.13994504809379577, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013399886153638362, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013399886153638362, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.00265240459702909, "signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625, "signal/frontier_aurc_reward/group_std_mean": 0.004353985376656056, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7478038322879004e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7478038322879004e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14081787765026094, "signal/frontier_coverage_1/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_1/group_std_mean": 0.18302632570266725, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025206399615854023, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025206399615854023, "signal/frontier_coverage_10/centered_abs_mean": 0.1402135133743286, "signal/frontier_coverage_10/group_bin_occupancy": 0.86953125, "signal/frontier_coverage_10/group_std_mean": 0.1822360187768936, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002509821904823184, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002509821904823184, "signal/frontier_coverage_15/centered_abs_mean": 0.12741587162017823, "signal/frontier_coverage_15/group_bin_occupancy": 0.865625, "signal/frontier_coverage_15/group_std_mean": 0.16580144464969634, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022807438392192124, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022807438392192124, "signal/frontier_coverage_20/centered_abs_mean": 0.08223778158426284, "signal/frontier_coverage_20/group_bin_occupancy": 0.875, "signal/frontier_coverage_20/group_std_mean": 0.10763536989688874, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014720562612637877, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014720562612637877, "signal/frontier_coverage_25/centered_abs_mean": 0.053284359723329545, "signal/frontier_coverage_25/group_bin_occupancy": 0.91953125, "signal/frontier_coverage_25/group_std_mean": 0.06900968700647354, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009537900099530816, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009537900099530816, "signal/frontier_coverage_5/centered_abs_mean": 0.1408083975315094, "signal/frontier_coverage_5/group_bin_occupancy": 0.869140625, "signal/frontier_coverage_5/group_std_mean": 0.18301377892494203, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025204701349139215, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025204701349139215, "signal/frontier_ece_reward/centered_abs_mean": 0.005157566629350185, "signal/frontier_ece_reward/group_bin_occupancy": 0.89921875, "signal/frontier_ece_reward/group_std_mean": 0.006740899570286274, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006446958286687732, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006446958286687732, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2672864556312561, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3404460310935974, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03341080695390701, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03341080695390701, "step": 235 }, { "calibration/aurc": 0.2681903946291124, "calibration/batch_distribution_entropy": 0.9767026425312034, "calibration/batch_entropy_100bins": 0.9688933764667013, "calibration/batch_entropy_10bins": 0.9767026425312034, "calibration/batch_entropy_50bins": 0.9750379284339221, "calibration/batch_uniqueness": 0.9525848388671875, "calibration/buffer_distribution_entropy": 0.999199839210462, "calibration/buffer_entropy_100bins": 0.9990462741781064, "calibration/buffer_entropy_10bins": 0.999199839210462, "calibration/buffer_entropy_50bins": 0.9991680034614012, "calibration/confidence_entropy": 0.5025959777866486, "calibration/coverage@0%": 0.042578125, "calibration/coverage@1%": 0.061328125, "calibration/coverage@10%": 0.226171875, "calibration/coverage@15%": 0.2875, "calibration/coverage@20%": 0.3734375, "calibration/coverage@25%": 0.491015625, "calibration/coverage@30%": 0.584765625, "calibration/coverage@5%": 0.177734375, "calibration/ece": 0.14313831918485717, "calibration/mean_confidence": 0.4804080079299814, "calibration/prompt_uniqueness": 0.8513671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.4, "completions/max_terminated_length": 403.4, "completions/mean_length": 200.05576171875, "completions/mean_terminated_length": 200.05576171875, "completions/min_length": 102.2, "completions/min_terminated_length": 102.2, "epoch": 0.768, "grad_norm": 0.0011492387857288122, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 810095233.0, "reward": 0.8437476992607117, "reward_std": 0.08492105603218078, "rewards/accuracy_reward": 0.50966796875, "rewards/brier_reward": 0.810984981060028, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002536864671856165, "rewards/frontier_coverage_1": 0.14460064321756363, "rewards/frontier_coverage_10": 0.14430496394634246, "rewards/frontier_coverage_15": 0.13622619807720185, "rewards/frontier_coverage_20": 0.0965993657708168, "rewards/frontier_coverage_25": 0.05859274864196777, "rewards/frontier_coverage_5": 0.14456866830587387, "rewards/frontier_ece_reward": 0.002746673859655857, "rewards/frontier_entropy_batch_reward": -0.20547258853912354, "signal/accuracy_reward/centered_abs_mean": 0.081915283203125, "signal/accuracy_reward/group_bin_occupancy": 0.166015625, "signal/accuracy_reward/group_std_mean": 0.11158370226621628, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409576416015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0409576416015625, "signal/advantage_abs_mean": 0.06546520814299583, "signal/advantage_pre_scale_abs_mean": 0.06546520814299583, "signal/advantage_pre_scale_std": 0.10177824050188064, "signal/advantage_std": 0.10177824050188064, "signal/brier_reward/centered_abs_mean": 0.10417567193508148, "signal/brier_reward/group_bin_occupancy": 0.845703125, "signal/brier_reward/group_std_mean": 0.13512639403343202, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013021958991885185, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013021958991885185, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020107618300244214, "signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625, "signal/frontier_aurc_reward/group_std_mean": 0.003275436395779252, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.599263691285159e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.599263691285159e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14825652539730072, "signal/frontier_coverage_1/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_1/group_std_mean": 0.19205498099327087, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002653791708871722, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002653791708871722, "signal/frontier_coverage_10/centered_abs_mean": 0.14757258892059327, "signal/frontier_coverage_10/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_10/group_std_mean": 0.19117499589920045, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002641549287363887, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641549287363887, "signal/frontier_coverage_15/centered_abs_mean": 0.13492438793182374, "signal/frontier_coverage_15/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_15/group_std_mean": 0.1748009592294693, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024151464458554983, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024151464458554983, "signal/frontier_coverage_20/centered_abs_mean": 0.0868727594614029, "signal/frontier_coverage_20/group_bin_occupancy": 0.88203125, "signal/frontier_coverage_20/group_std_mean": 0.11287190318107605, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015550222946330906, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015550222946330906, "signal/frontier_coverage_25/centered_abs_mean": 0.05342138335108757, "signal/frontier_coverage_25/group_bin_occupancy": 0.926171875, "signal/frontier_coverage_25/group_std_mean": 0.06843771934509277, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009562427527271211, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009562427527271211, "signal/frontier_coverage_5/centered_abs_mean": 0.1482018768787384, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.1919853150844574, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026528135407716037, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026528135407716037, "signal/frontier_ece_reward/centered_abs_mean": 0.004641291126608849, "signal/frontier_ece_reward/group_bin_occupancy": 0.895703125, "signal/frontier_ece_reward/group_std_mean": 0.006130393128842115, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005801613908261061, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005801613908261061, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2680731534957886, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34347763657569885, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03350914418697357, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03350914418697357, "step": 240 }, { "calibration/aurc": 0.3410998255186805, "calibration/batch_distribution_entropy": 0.98266861992294, "calibration/batch_entropy_100bins": 0.9725637560170315, "calibration/batch_entropy_10bins": 0.98266861992294, "calibration/batch_entropy_50bins": 0.9805054265093982, "calibration/batch_uniqueness": 0.9535858154296875, "calibration/buffer_distribution_entropy": 0.9991640665932143, "calibration/buffer_entropy_100bins": 0.9990458872496693, "calibration/buffer_entropy_10bins": 0.9991640665932143, "calibration/buffer_entropy_50bins": 0.9991495337630607, "calibration/confidence_entropy": 0.47953550982848137, "calibration/coverage@0%": 0.02890625, "calibration/coverage@1%": 0.04296875, "calibration/coverage@10%": 0.2109375, "calibration/coverage@15%": 0.2875, "calibration/coverage@20%": 0.32265625, "calibration/coverage@25%": 0.34765625, "calibration/coverage@30%": 0.365625, "calibration/coverage@5%": 0.098828125, "calibration/ece": 0.16965724304332572, "calibration/mean_confidence": 0.4929179841997679, "calibration/prompt_uniqueness": 0.83154296875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 863.4, "completions/max_terminated_length": 431.8, "completions/mean_length": 197.94599609375, "completions/mean_terminated_length": 197.5545166015625, "completions/min_length": 103.2, "completions/min_terminated_length": 103.2, "epoch": 0.784, "grad_norm": 0.0008992942166514695, "learning_rate": 1e-06, "loss": 0.0007, "num_tokens": 827296568.0, "reward": 0.860960865020752, "reward_std": 0.08637812584638596, "rewards/accuracy_reward": 0.55595703125, "rewards/brier_reward": 0.7818328976631165, "rewards/format_reward": 0.99970703125, "rewards/frontier_aurc_reward": -0.002828824752941728, "rewards/frontier_coverage_1": 0.08855738416314125, "rewards/frontier_coverage_10": 0.08812275156378746, "rewards/frontier_coverage_15": 0.08236327841877937, "rewards/frontier_coverage_20": 0.059821216762065886, "rewards/frontier_coverage_25": 0.043670283257961275, "rewards/frontier_coverage_5": 0.08851732909679413, "rewards/frontier_ece_reward": 0.0014453153213253244, "rewards/frontier_entropy_batch_reward": -0.18243320286273956, "signal/accuracy_reward/centered_abs_mean": 0.097711181640625, "signal/accuracy_reward/group_bin_occupancy": 0.169140625, "signal/accuracy_reward/group_std_mean": 0.12649333626031875, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0488555908203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0488555908203125, "signal/advantage_abs_mean": 0.06811224520206452, "signal/advantage_pre_scale_abs_mean": 0.06811224520206452, "signal/advantage_pre_scale_std": 0.10434643775224686, "signal/advantage_std": 0.10434643775224686, "signal/brier_reward/centered_abs_mean": 0.11582219302654266, "signal/brier_reward/group_bin_occupancy": 0.84453125, "signal/brier_reward/group_std_mean": 0.14920888543128968, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014477774128317833, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014477774128317833, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_bin_occupancy": 0.126171875, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_aurc_reward/centered_abs_mean": 0.00234157289378345, "signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875, "signal/frontier_aurc_reward/group_std_mean": 0.00370767368003726, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.19141557358671e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.19141557358671e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1641725480556488, "signal/frontier_coverage_1/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_1/group_std_mean": 0.21050458252429963, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002938688499853015, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938688499853015, "signal/frontier_coverage_10/centered_abs_mean": 0.16328320205211638, "signal/frontier_coverage_10/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_10/group_std_mean": 0.20939250588417052, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029227692633867265, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029227692633867265, "signal/frontier_coverage_15/centered_abs_mean": 0.1482144132256508, "signal/frontier_coverage_15/group_bin_occupancy": 0.848828125, "signal/frontier_coverage_15/group_std_mean": 0.1904875546693802, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002653037803247571, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002653037803247571, "signal/frontier_coverage_20/centered_abs_mean": 0.09080570191144943, "signal/frontier_coverage_20/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_20/group_std_mean": 0.11784365773200989, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016254220623522996, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016254220623522996, "signal/frontier_coverage_25/centered_abs_mean": 0.05480258762836456, "signal/frontier_coverage_25/group_bin_occupancy": 0.91015625, "signal/frontier_coverage_25/group_std_mean": 0.07086438089609146, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000980966305360198, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000980966305360198, "signal/frontier_coverage_5/centered_abs_mean": 0.16410693824291228, "signal/frontier_coverage_5/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_5/group_std_mean": 0.21042270064353943, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029375139623880387, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029375139623880387, "signal/frontier_ece_reward/centered_abs_mean": 0.004935114085674286, "signal/frontier_ece_reward/group_bin_occupancy": 0.89140625, "signal/frontier_ece_reward/group_std_mean": 0.006519688945263624, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006168892607092858, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006168892607092858, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24725628197193145, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3199459671974182, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03090703524649143, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03090703524649143, "step": 245 }, { "calibration/aurc": 0.1940784264652428, "calibration/batch_distribution_entropy": 0.9770675388500825, "calibration/batch_entropy_100bins": 0.9662281299714202, "calibration/batch_entropy_10bins": 0.9770675388500825, "calibration/batch_entropy_50bins": 0.9742705536997672, "calibration/batch_uniqueness": 0.9523101806640625, "calibration/buffer_distribution_entropy": 0.999172449019247, "calibration/buffer_entropy_100bins": 0.9990648204449872, "calibration/buffer_entropy_10bins": 0.999172449019247, "calibration/buffer_entropy_50bins": 0.9991687135590434, "calibration/confidence_entropy": 0.4846538428188453, "calibration/coverage@0%": 0.0546875, "calibration/coverage@1%": 0.0546875, "calibration/coverage@10%": 0.346484375, "calibration/coverage@15%": 0.489453125, "calibration/coverage@20%": 0.575, "calibration/coverage@25%": 0.65234375, "calibration/coverage@30%": 0.7375, "calibration/coverage@5%": 0.1671875, "calibration/ece": 0.10917596226614659, "calibration/mean_confidence": 0.4966383387592807, "calibration/prompt_uniqueness": 0.8314453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 653.8, "completions/max_terminated_length": 424.2, "completions/mean_length": 195.6087890625, "completions/mean_terminated_length": 195.47777709960937, "completions/min_length": 104.2, "completions/min_terminated_length": 104.2, "epoch": 0.8, "grad_norm": 0.0010258163092657924, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 844310162.0, "reward": 0.8748032927513123, "reward_std": 0.08496512919664383, "rewards/accuracy_reward": 0.58251953125, "rewards/brier_reward": 0.8144230008125305, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002836526231840253, "rewards/frontier_coverage_1": 0.10128066837787628, "rewards/frontier_coverage_10": 0.10073070526123047, "rewards/frontier_coverage_15": 0.09388678222894668, "rewards/frontier_coverage_20": 0.06677651032805443, "rewards/frontier_coverage_25": 0.05463530197739601, "rewards/frontier_coverage_5": 0.10115833282470703, "rewards/frontier_ece_reward": 0.002541623217985034, "rewards/frontier_entropy_batch_reward": -0.2220643639564514, "signal/accuracy_reward/centered_abs_mean": 0.084588623046875, "signal/accuracy_reward/group_bin_occupancy": 0.16484375, "signal/accuracy_reward/group_std_mean": 0.11226904094219207, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422943115234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422943115234375, "signal/advantage_abs_mean": 0.06675532534718513, "signal/advantage_pre_scale_abs_mean": 0.06675532534718513, "signal/advantage_pre_scale_std": 0.10448435842990875, "signal/advantage_std": 0.10448435842990875, "signal/brier_reward/centered_abs_mean": 0.10213624089956283, "signal/brier_reward/group_bin_occupancy": 0.83828125, "signal/brier_reward/group_std_mean": 0.1328400731086731, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012767030112445354, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012767030112445354, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002544344821944833, "signal/frontier_aurc_reward/group_bin_occupancy": 0.728125, "signal/frontier_aurc_reward/group_std_mean": 0.003990656137466431, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.554377155727707e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.554377155727707e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1359323427081108, "signal/frontier_coverage_1/group_bin_occupancy": 0.841796875, "signal/frontier_coverage_1/group_std_mean": 0.17886653840541838, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002433188818395138, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002433188818395138, "signal/frontier_coverage_10/centered_abs_mean": 0.1348055586218834, "signal/frontier_coverage_10/group_bin_occupancy": 0.841796875, "signal/frontier_coverage_10/group_std_mean": 0.17739444077014924, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024130194447934627, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024130194447934627, "signal/frontier_coverage_15/centered_abs_mean": 0.1202843114733696, "signal/frontier_coverage_15/group_bin_occupancy": 0.83515625, "signal/frontier_coverage_15/group_std_mean": 0.15877383649349214, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021530891302973033, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021530891302973033, "signal/frontier_coverage_20/centered_abs_mean": 0.0718239963054657, "signal/frontier_coverage_20/group_bin_occupancy": 0.85625, "signal/frontier_coverage_20/group_std_mean": 0.09572341293096542, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012856494868174195, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012856494868174195, "signal/frontier_coverage_25/centered_abs_mean": 0.051678837090730664, "signal/frontier_coverage_25/group_bin_occupancy": 0.923046875, "signal/frontier_coverage_25/group_std_mean": 0.06689032912254333, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009250511298887432, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009250511298887432, "signal/frontier_coverage_5/centered_abs_mean": 0.13572666347026824, "signal/frontier_coverage_5/group_bin_occupancy": 0.842578125, "signal/frontier_coverage_5/group_std_mean": 0.17859258353710175, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024295071605592968, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024295071605592968, "signal/frontier_ece_reward/centered_abs_mean": 0.005082287080585957, "signal/frontier_ece_reward/group_bin_occupancy": 0.8890625, "signal/frontier_ece_reward/group_std_mean": 0.006694659031927586, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006352858850732446, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006352858850732446, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2741507351398468, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34453503489494325, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426884189248085, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426884189248085, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4024781794204561, "eval_calibration/batch_distribution_entropy": 0.9411803508728845, "eval_calibration/batch_entropy_100bins": 0.722578085023462, "eval_calibration/batch_entropy_10bins": 0.9411803508728845, "eval_calibration/batch_entropy_50bins": 0.798684377132812, "eval_calibration/batch_uniqueness": 0.8974609375, "eval_calibration/buffer_distribution_entropy": 0.9992212146130883, "eval_calibration/buffer_entropy_100bins": 0.9990995964084779, "eval_calibration/buffer_entropy_10bins": 0.9992212146130883, "eval_calibration/buffer_entropy_50bins": 0.9992052314450008, "eval_calibration/confidence_entropy": 0.4720443419968122, "eval_calibration/coverage@0%": 0.0703125, "eval_calibration/coverage@1%": 0.0703125, "eval_calibration/coverage@10%": 0.0703125, "eval_calibration/coverage@15%": 0.203125, "eval_calibration/coverage@20%": 0.2421875, "eval_calibration/coverage@25%": 0.3125, "eval_calibration/coverage@30%": 0.34375, "eval_calibration/coverage@5%": 0.0703125, "eval_calibration/ece": 0.17127804387863715, "eval_calibration/mean_confidence": 0.4861035242434224, "eval_calibration/prompt_uniqueness": 0.8974609375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 359.75, "eval_completions/max_terminated_length": 359.75, "eval_completions/mean_length": 197.09428787231445, "eval_completions/mean_terminated_length": 197.09428787231445, "eval_completions/min_length": 126.0, "eval_completions/min_terminated_length": 126.0, "eval_loss": 0.0, "eval_num_tokens": 844310162.0, "eval_reward": 0.7075212150812149, "eval_reward_std": 0.23280686885118484, "eval_rewards/accuracy_reward": 0.4296875, "eval_rewards/brier_reward": 0.8027084320783615, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.0038928079302422702, "eval_rewards/frontier_coverage_1": 0.200554970651865, "eval_rewards/frontier_coverage_10": 0.19824624806642532, "eval_rewards/frontier_coverage_15": 0.17718525603413582, "eval_rewards/frontier_coverage_20": 0.10994750820100307, "eval_rewards/frontier_coverage_25": 0.06260389927774668, "eval_rewards/frontier_coverage_5": 0.19991321116685867, "eval_rewards/frontier_ece_reward": 0.003450465912465006, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 19.3003, "eval_samples_per_second": 25.906, "eval_signal/accuracy_reward/centered_abs_mean": 0.4765625, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.49562519043684006, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23828125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23828125, "eval_signal/advantage_abs_mean": 0.21785810217261314, "eval_signal/advantage_pre_scale_abs_mean": 0.21785810217261314, "eval_signal/advantage_pre_scale_std": 0.23014385625720024, "eval_signal/advantage_std": 0.23014385625720024, "eval_signal/brier_reward/centered_abs_mean": 0.1881571188569069, "eval_signal/brier_reward/group_bin_occupancy": 0.875, "eval_signal/brier_reward/group_std_mean": 0.23912940546870232, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02351963985711336, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02351963985711336, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005338445422239602, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6328125, "eval_signal/frontier_aurc_reward/group_std_mean": 0.009567599976435304, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.555817086948082e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.555817086948082e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.348846860229969, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_1/group_std_mean": 0.42089004069566727, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006244358723051846, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006244358723051846, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3454489931464195, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_10/group_std_mean": 0.4168899804353714, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006183536606840789, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006183536606840789, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.30945945531129837, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375, "eval_signal/frontier_coverage_15/group_std_mean": 0.37479550391435623, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005539324251003563, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005539324251003563, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1743907555937767, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_20/group_std_mean": 0.2171802930533886, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031215944909490645, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031215944909490645, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0898975171148777, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875, "eval_signal/frontier_coverage_25/group_std_mean": 0.11114342510700226, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016091655124910176, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016091655124910176, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793129563331604, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875, "eval_signal/frontier_coverage_5/group_std_mean": 0.41981156170368195, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006227970006875694, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006227970006875694, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006496628629975021, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9609375, "eval_signal/frontier_ece_reward/group_std_mean": 0.008258524350821972, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008120785787468776, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008120785787468776, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.207, "step": 250 }, { "calibration/aurc": 0.20885883826229318, "calibration/batch_distribution_entropy": 0.9617354323768474, "calibration/batch_entropy_100bins": 0.9607130781173305, "calibration/batch_entropy_10bins": 0.9617354323768474, "calibration/batch_entropy_50bins": 0.9659121941892688, "calibration/batch_uniqueness": 0.9494836297596694, "calibration/buffer_distribution_entropy": 0.999124191511771, "calibration/buffer_entropy_100bins": 0.999041988564324, "calibration/buffer_entropy_10bins": 0.999124191511771, "calibration/buffer_entropy_50bins": 0.9991497628306819, "calibration/confidence_entropy": 0.46736913978754613, "calibration/coverage@0%": 0.043359375, "calibration/coverage@1%": 0.043359375, "calibration/coverage@10%": 0.171484375, "calibration/coverage@15%": 0.28758408757338555, "calibration/coverage@20%": 0.557930987035225, "calibration/coverage@25%": 0.7530630809686889, "calibration/coverage@30%": 0.8226279659980431, "calibration/coverage@5%": 0.102734375, "calibration/ece": 0.14049068595229333, "calibration/mean_confidence": 0.523476685252264, "calibration/prompt_uniqueness": 0.8333644458084027, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 467.6, "completions/max_terminated_length": 467.6, "completions/mean_length": 194.86865234375, "completions/mean_terminated_length": 194.86865234375, "completions/min_length": 103.8, "completions/min_terminated_length": 103.8, "epoch": 0.816, "grad_norm": 0.001114765414968133, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 861404785.0, "reward": 0.8725868582725524, "reward_std": 0.08807137310504913, "rewards/accuracy_reward": 0.5794921875, "rewards/brier_reward": 0.8018252968788147, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002854443807154894, "rewards/frontier_coverage_1": 0.08887268304824829, "rewards/frontier_coverage_10": 0.08834582418203354, "rewards/frontier_coverage_15": 0.08466917127370835, "rewards/frontier_coverage_20": 0.060265733301639555, "rewards/frontier_coverage_25": 0.051320061832666394, "rewards/frontier_coverage_5": 0.08884882032871247, "rewards/frontier_ece_reward": 0.0020072998944669963, "rewards/frontier_entropy_batch_reward": -0.2065118134021759, "signal/accuracy_reward/centered_abs_mean": 0.09168701171875, "signal/accuracy_reward/group_bin_occupancy": 0.170703125, "signal/accuracy_reward/group_std_mean": 0.12381611913442611, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045843505859375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.045843505859375, "signal/advantage_abs_mean": 0.06842098832130432, "signal/advantage_pre_scale_abs_mean": 0.06842098832130432, "signal/advantage_pre_scale_std": 0.10695004910230636, "signal/advantage_std": 0.10695004910230636, "signal/brier_reward/centered_abs_mean": 0.10891520380973815, "signal/brier_reward/group_bin_occupancy": 0.833203125, "signal/brier_reward/group_std_mean": 0.14026750177145003, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01361440047621727, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01361440047621727, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002719699405133724, "signal/frontier_aurc_reward/group_bin_occupancy": 0.71875, "signal/frontier_aurc_reward/group_std_mean": 0.00452100308611989, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8682617489248514e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8682617489248514e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14558494091033936, "signal/frontier_coverage_1/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_1/group_std_mean": 0.18775410056114197, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026059703435748816, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026059703435748816, "signal/frontier_coverage_10/centered_abs_mean": 0.14347892701625825, "signal/frontier_coverage_10/group_bin_occupancy": 0.858203125, "signal/frontier_coverage_10/group_std_mean": 0.1850330114364624, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025682727340608836, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025682727340608836, "signal/frontier_coverage_15/centered_abs_mean": 0.12778309732675552, "signal/frontier_coverage_15/group_bin_occupancy": 0.85, "signal/frontier_coverage_15/group_std_mean": 0.16491487622261047, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002287317393347621, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002287317393347621, "signal/frontier_coverage_20/centered_abs_mean": 0.07579994648694992, "signal/frontier_coverage_20/group_bin_occupancy": 0.884375, "signal/frontier_coverage_20/group_std_mean": 0.09820334166288376, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013568190392106772, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013568190392106772, "signal/frontier_coverage_25/centered_abs_mean": 0.05239018201828003, "signal/frontier_coverage_25/group_bin_occupancy": 0.92109375, "signal/frontier_coverage_25/group_std_mean": 0.06699474751949311, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009377842419780791, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009377842419780791, "signal/frontier_coverage_5/centered_abs_mean": 0.14545360803604127, "signal/frontier_coverage_5/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_5/group_std_mean": 0.18758580982685089, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002603619499132037, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002603619499132037, "signal/frontier_ece_reward/centered_abs_mean": 0.004751469660550356, "signal/frontier_ece_reward/group_bin_occupancy": 0.9015625, "signal/frontier_ece_reward/group_std_mean": 0.006205685343593359, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005939337075687945, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005939337075687945, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265206840634346, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33529953956604003, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03315085507929325, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03315085507929325, "step": 255 }, { "calibration/aurc": 0.26691739801367803, "calibration/batch_distribution_entropy": 0.9774445841742269, "calibration/batch_entropy_100bins": 0.9696266513555238, "calibration/batch_entropy_10bins": 0.9774445841742269, "calibration/batch_entropy_50bins": 0.9781202652767531, "calibration/batch_uniqueness": 0.9535858154296875, "calibration/buffer_distribution_entropy": 0.9989725991598203, "calibration/buffer_entropy_100bins": 0.9989539238306406, "calibration/buffer_entropy_10bins": 0.9989725991598203, "calibration/buffer_entropy_50bins": 0.9990613562739397, "calibration/confidence_entropy": 0.4988719882240712, "calibration/coverage@0%": 0.03828125, "calibration/coverage@1%": 0.03828125, "calibration/coverage@10%": 0.25078125, "calibration/coverage@15%": 0.294921875, "calibration/coverage@20%": 0.372265625, "calibration/coverage@25%": 0.455859375, "calibration/coverage@30%": 0.5828125, "calibration/coverage@5%": 0.15078125, "calibration/ece": 0.10967759805603246, "calibration/mean_confidence": 0.48722149702527523, "calibration/prompt_uniqueness": 0.846044921875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.4, "completions/max_terminated_length": 442.4, "completions/mean_length": 196.14306640625, "completions/mean_terminated_length": 196.14306640625, "completions/min_length": 103.2, "completions/min_terminated_length": 103.2, "epoch": 0.832, "grad_norm": 0.0010814516572281718, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 878421642.0, "reward": 0.8648443460464478, "reward_std": 0.08550989478826523, "rewards/accuracy_reward": 0.55498046875, "rewards/brier_reward": 0.8163813591003418, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0023734794463962316, "rewards/frontier_coverage_1": 0.11868036091327668, "rewards/frontier_coverage_10": 0.11617294400930404, "rewards/frontier_coverage_15": 0.10371433347463607, "rewards/frontier_coverage_20": 0.07033977434039115, "rewards/frontier_coverage_25": 0.0553276963531971, "rewards/frontier_coverage_5": 0.11858219057321548, "rewards/frontier_ece_reward": 0.0023112162714824082, "rewards/frontier_entropy_batch_reward": -0.20258863270282745, "signal/accuracy_reward/centered_abs_mean": 0.090997314453125, "signal/accuracy_reward/group_bin_occupancy": 0.169140625, "signal/accuracy_reward/group_std_mean": 0.12111333757638931, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454986572265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0454986572265625, "signal/advantage_abs_mean": 0.06704051047563553, "signal/advantage_pre_scale_abs_mean": 0.06704051047563553, "signal/advantage_pre_scale_std": 0.10539929419755936, "signal/advantage_std": 0.10539929419755936, "signal/brier_reward/centered_abs_mean": 0.10057551860809326, "signal/brier_reward/group_bin_occupancy": 0.841796875, "signal/brier_reward/group_std_mean": 0.13067585229873657, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012571939826011657, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012571939826011657, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020601370837539435, "signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625, "signal/frontier_aurc_reward/group_std_mean": 0.0032557172700762747, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.687645366881043e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.687645366881043e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14384538531303406, "signal/frontier_coverage_1/group_bin_occupancy": 0.858984375, "signal/frontier_coverage_1/group_std_mean": 0.18559444546699524, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025748323649168016, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025748323649168016, "signal/frontier_coverage_10/centered_abs_mean": 0.14038788378238679, "signal/frontier_coverage_10/group_bin_occupancy": 0.85859375, "signal/frontier_coverage_10/group_std_mean": 0.18115375339984893, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025129430461674927, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025129430461674927, "signal/frontier_coverage_15/centered_abs_mean": 0.12292735427618026, "signal/frontier_coverage_15/group_bin_occupancy": 0.855859375, "signal/frontier_coverage_15/group_std_mean": 0.15857858061790467, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022003995720297096, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022003995720297096, "signal/frontier_coverage_20/centered_abs_mean": 0.07247701585292816, "signal/frontier_coverage_20/group_bin_occupancy": 0.891015625, "signal/frontier_coverage_20/group_std_mean": 0.09377783834934235, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012973385397344827, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012973385397344827, "signal/frontier_coverage_25/centered_abs_mean": 0.05139257907867432, "signal/frontier_coverage_25/group_bin_occupancy": 0.932421875, "signal/frontier_coverage_25/group_std_mean": 0.06542427986860275, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000919927132781595, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000919927132781595, "signal/frontier_coverage_5/centered_abs_mean": 0.14371106922626495, "signal/frontier_coverage_5/group_bin_occupancy": 0.85859375, "signal/frontier_coverage_5/group_std_mean": 0.1854223281145096, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025724280625581742, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025724280625581742, "signal/frontier_ece_reward/centered_abs_mean": 0.004667305201292038, "signal/frontier_ece_reward/group_bin_occupancy": 0.894921875, "signal/frontier_ece_reward/group_std_mean": 0.006094491388648748, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005834131501615047, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005834131501615047, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624901086091995, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3327677011489868, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03281126357614994, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281126357614994, "step": 260 }, { "calibration/aurc": 0.31542229607064637, "calibration/batch_distribution_entropy": 0.9739707310173376, "calibration/batch_entropy_100bins": 0.9635504948025592, "calibration/batch_entropy_10bins": 0.9739707310173376, "calibration/batch_entropy_50bins": 0.9730236720183898, "calibration/batch_uniqueness": 0.951666259765625, "calibration/buffer_distribution_entropy": 0.9989930255604248, "calibration/buffer_entropy_100bins": 0.9989505577332609, "calibration/buffer_entropy_10bins": 0.9989930255604248, "calibration/buffer_entropy_50bins": 0.999068960960001, "calibration/confidence_entropy": 0.5086728375442844, "calibration/coverage@0%": 0.0296875, "calibration/coverage@1%": 0.0296875, "calibration/coverage@10%": 0.120703125, "calibration/coverage@15%": 0.2734375, "calibration/coverage@20%": 0.417578125, "calibration/coverage@25%": 0.491796875, "calibration/coverage@30%": 0.5453125, "calibration/coverage@5%": 0.102734375, "calibration/ece": 0.1579181461275594, "calibration/mean_confidence": 0.5313260932295083, "calibration/prompt_uniqueness": 0.837255859375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 421.6, "completions/max_terminated_length": 421.6, "completions/mean_length": 194.3615234375, "completions/mean_terminated_length": 194.3615234375, "completions/min_length": 101.4, "completions/min_terminated_length": 101.4, "epoch": 0.848, "grad_norm": 0.0008651363314129412, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 895426272.0, "reward": 0.8523078799247742, "reward_std": 0.08377386629581451, "rewards/accuracy_reward": 0.53427734375, "rewards/brier_reward": 0.8082629799842834, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.002553269104100764, "rewards/frontier_coverage_1": 0.11806271076202393, "rewards/frontier_coverage_10": 0.11662331819534302, "rewards/frontier_coverage_15": 0.10539236664772034, "rewards/frontier_coverage_20": 0.06768357157707214, "rewards/frontier_coverage_25": 0.04773269593715668, "rewards/frontier_coverage_5": 0.1179862841963768, "rewards/frontier_ece_reward": 0.0020762649830430744, "rewards/frontier_entropy_batch_reward": -0.21074254512786866, "signal/accuracy_reward/centered_abs_mean": 0.076470947265625, "signal/accuracy_reward/group_bin_occupancy": 0.165625, "signal/accuracy_reward/group_std_mean": 0.1066226527094841, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0382354736328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0382354736328125, "signal/advantage_abs_mean": 0.06452079713344575, "signal/advantage_pre_scale_abs_mean": 0.06452079713344575, "signal/advantage_pre_scale_std": 0.10078646242618561, "signal/advantage_std": 0.10078646242618561, "signal/brier_reward/centered_abs_mean": 0.095879465341568, "signal/brier_reward/group_bin_occupancy": 0.86015625, "signal/brier_reward/group_std_mean": 0.12415737211704254, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011984933167696, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.011984933167696, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.002056886232458055, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875, "signal/frontier_aurc_reward/group_std_mean": 0.0032547391252592205, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.681826237880159e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.681826237880159e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.13201110661029816, "signal/frontier_coverage_1/group_bin_occupancy": 0.875, "signal/frontier_coverage_1/group_std_mean": 0.1731318861246109, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023629988078027963, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023629988078027963, "signal/frontier_coverage_10/centered_abs_mean": 0.12971103489398955, "signal/frontier_coverage_10/group_bin_occupancy": 0.872265625, "signal/frontier_coverage_10/group_std_mean": 0.17014427483081818, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00232182745821774, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00232182745821774, "signal/frontier_coverage_15/centered_abs_mean": 0.11497683823108673, "signal/frontier_coverage_15/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_15/group_std_mean": 0.15072887837886811, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002058085426688194, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002058085426688194, "signal/frontier_coverage_20/centered_abs_mean": 0.06714669689536094, "signal/frontier_coverage_20/group_bin_occupancy": 0.9, "signal/frontier_coverage_20/group_std_mean": 0.08797992616891862, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001201925822533667, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001201925822533667, "signal/frontier_coverage_25/centered_abs_mean": 0.04629442393779755, "signal/frontier_coverage_25/group_bin_occupancy": 0.921484375, "signal/frontier_coverage_25/group_std_mean": 0.059931250661611556, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008286701398901641, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008286701398901641, "signal/frontier_coverage_5/centered_abs_mean": 0.1318788543343544, "signal/frontier_coverage_5/group_bin_occupancy": 0.875, "signal/frontier_coverage_5/group_std_mean": 0.17296003997325898, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023606313858181237, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023606313858181237, "signal/frontier_ece_reward/centered_abs_mean": 0.004494541138410568, "signal/frontier_ece_reward/group_bin_occupancy": 0.894140625, "signal/frontier_ece_reward/group_std_mean": 0.0058809550479054454, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000561817642301321, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000561817642301321, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.279247921705246, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3494983911514282, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03490599021315575, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03490599021315575, "step": 265 }, { "calibration/aurc": 0.2551935935751549, "calibration/batch_distribution_entropy": 0.9551683899262354, "calibration/batch_entropy_100bins": 0.9561740490407103, "calibration/batch_entropy_10bins": 0.9551683899262354, "calibration/batch_entropy_50bins": 0.9628317632115422, "calibration/batch_uniqueness": 0.9490325927734375, "calibration/buffer_distribution_entropy": 0.9990927517675319, "calibration/buffer_entropy_100bins": 0.9990242562445925, "calibration/buffer_entropy_10bins": 0.9990927517675319, "calibration/buffer_entropy_50bins": 0.9991437917105215, "calibration/confidence_entropy": 0.49864896977334033, "calibration/coverage@0%": 0.034375, "calibration/coverage@1%": 0.034375, "calibration/coverage@10%": 0.194921875, "calibration/coverage@15%": 0.2421875, "calibration/coverage@20%": 0.34765625, "calibration/coverage@25%": 0.444140625, "calibration/coverage@30%": 0.575, "calibration/coverage@5%": 0.108203125, "calibration/ece": 0.12198031278700468, "calibration/mean_confidence": 0.5995220320808029, "calibration/prompt_uniqueness": 0.8458984375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 647.8, "completions/max_terminated_length": 428.6, "completions/mean_length": 196.71005859375, "completions/mean_terminated_length": 196.57922973632813, "completions/min_length": 105.8, "completions/min_terminated_length": 105.8, "epoch": 0.864, "grad_norm": 0.0012777691008523107, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 912427399.0, "reward": 0.8713708758354187, "reward_std": 0.09100723564624787, "rewards/accuracy_reward": 0.587109375, "rewards/brier_reward": 0.8016261577606201, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0026119566056877373, "rewards/frontier_coverage_1": 0.07935620397329331, "rewards/frontier_coverage_10": 0.07880553454160691, "rewards/frontier_coverage_15": 0.07368464544415473, "rewards/frontier_coverage_20": 0.05397990569472313, "rewards/frontier_coverage_25": 0.04986085593700409, "rewards/frontier_coverage_5": 0.07933037877082824, "rewards/frontier_ece_reward": 0.0016980181448161603, "rewards/frontier_entropy_batch_reward": -0.23946044743061065, "signal/accuracy_reward/centered_abs_mean": 0.0991943359375, "signal/accuracy_reward/group_bin_occupancy": 0.16953125, "signal/accuracy_reward/group_std_mean": 0.12835633456707002, "signal/accuracy_reward/group_zero_std_frac": 0.64375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04959716796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04959716796875, "signal/advantage_abs_mean": 0.07214201688766479, "signal/advantage_pre_scale_abs_mean": 0.07214201688766479, "signal/advantage_pre_scale_std": 0.10875225216150283, "signal/advantage_std": 0.10875225216150283, "signal/brier_reward/centered_abs_mean": 0.10874636620283126, "signal/brier_reward/group_bin_occupancy": 0.8484375, "signal/brier_reward/group_std_mean": 0.13965383768081666, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013593295775353908, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013593295775353908, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023859881330281496, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73984375, "signal/frontier_aurc_reward/group_std_mean": 0.003907088562846184, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.270918434485793e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.270918434485793e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15109747648239136, "signal/frontier_coverage_1/group_bin_occupancy": 0.8625, "signal/frontier_coverage_1/group_std_mean": 0.19283765852451323, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00270464476197958, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00270464476197958, "signal/frontier_coverage_10/centered_abs_mean": 0.149289670586586, "signal/frontier_coverage_10/group_bin_occupancy": 0.86171875, "signal/frontier_coverage_10/group_std_mean": 0.19051893651485444, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002672284934669733, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002672284934669733, "signal/frontier_coverage_15/centered_abs_mean": 0.13000792711973191, "signal/frontier_coverage_15/group_bin_occupancy": 0.851953125, "signal/frontier_coverage_15/group_std_mean": 0.16591603457927703, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023271418176591396, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023271418176591396, "signal/frontier_coverage_20/centered_abs_mean": 0.07444732487201691, "signal/frontier_coverage_20/group_bin_occupancy": 0.896875, "signal/frontier_coverage_20/group_std_mean": 0.09547023475170135, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013326070504263044, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013326070504263044, "signal/frontier_coverage_25/centered_abs_mean": 0.05278872922062874, "signal/frontier_coverage_25/group_bin_occupancy": 0.923046875, "signal/frontier_coverage_25/group_std_mean": 0.06748096346855163, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449182078242302, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449182078242302, "signal/frontier_coverage_5/centered_abs_mean": 0.15094164311885833, "signal/frontier_coverage_5/group_bin_occupancy": 0.8625, "signal/frontier_coverage_5/group_std_mean": 0.19263845980167388, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002701855357736349, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002701855357736349, "signal/frontier_ece_reward/centered_abs_mean": 0.004699286818504333, "signal/frontier_ece_reward/group_bin_occupancy": 0.900390625, "signal/frontier_ece_reward/group_std_mean": 0.006102659367024898, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005874108523130417, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005874108523130417, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29436487555503843, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3652419447898865, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036795609444379804, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036795609444379804, "step": 270 }, { "calibration/aurc": 0.3689968283562816, "calibration/batch_distribution_entropy": 0.9816647884171952, "calibration/batch_entropy_100bins": 0.9690518445136315, "calibration/batch_entropy_10bins": 0.9816647884171952, "calibration/batch_entropy_50bins": 0.9786919422206465, "calibration/batch_uniqueness": 0.9536102294921875, "calibration/buffer_distribution_entropy": 0.9991432971050402, "calibration/buffer_entropy_100bins": 0.9990850763393919, "calibration/buffer_entropy_10bins": 0.9991432971050402, "calibration/buffer_entropy_50bins": 0.9991792049124377, "calibration/confidence_entropy": 0.47520039428209443, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.0296875, "calibration/coverage@15%": 0.096484375, "calibration/coverage@20%": 0.146875, "calibration/coverage@25%": 0.214453125, "calibration/coverage@30%": 0.278515625, "calibration/coverage@5%": 0.02265625, "calibration/ece": 0.1265384011632356, "calibration/mean_confidence": 0.5028521730859784, "calibration/prompt_uniqueness": 0.831884765625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 452.8, "completions/max_terminated_length": 452.8, "completions/mean_length": 191.644921875, "completions/mean_terminated_length": 191.644921875, "completions/min_length": 97.6, "completions/min_terminated_length": 97.6, "epoch": 0.88, "grad_norm": 0.0009742515976540744, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 929536915.0, "reward": 0.8399426817893982, "reward_std": 0.09036057144403457, "rewards/accuracy_reward": 0.51142578125, "rewards/brier_reward": 0.7988796353340148, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.003130771638825536, "rewards/frontier_coverage_1": 0.13325000703334808, "rewards/frontier_coverage_10": 0.13197922110557556, "rewards/frontier_coverage_15": 0.11483763456344605, "rewards/frontier_coverage_20": 0.07162886634469032, "rewards/frontier_coverage_25": 0.05139811635017395, "rewards/frontier_coverage_5": 0.13309186547994614, "rewards/frontier_ece_reward": 0.0023863946786150335, "rewards/frontier_entropy_batch_reward": -0.21808099746704102, "signal/accuracy_reward/centered_abs_mean": 0.097894287109375, "signal/accuracy_reward/group_bin_occupancy": 0.169921875, "signal/accuracy_reward/group_std_mean": 0.1274777978658676, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0489471435546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0489471435546875, "signal/advantage_abs_mean": 0.0720147468149662, "signal/advantage_pre_scale_abs_mean": 0.0720147468149662, "signal/advantage_pre_scale_std": 0.11001690626144409, "signal/advantage_std": 0.11001690626144409, "signal/brier_reward/centered_abs_mean": 0.11212355941534043, "signal/brier_reward/group_bin_occupancy": 0.84609375, "signal/brier_reward/group_std_mean": 0.14494499266147615, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015444926917553, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014015444926917553, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0028623202815651894, "signal/frontier_aurc_reward/group_bin_occupancy": 0.706640625, "signal/frontier_aurc_reward/group_std_mean": 0.004754068516194821, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.123553055454977e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.123553055454977e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15715896785259248, "signal/frontier_coverage_1/group_bin_occupancy": 0.8703125, "signal/frontier_coverage_1/group_std_mean": 0.20266908705234526, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002813145564869046, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002813145564869046, "signal/frontier_coverage_10/centered_abs_mean": 0.15548037588596345, "signal/frontier_coverage_10/group_bin_occupancy": 0.869921875, "signal/frontier_coverage_10/group_std_mean": 0.20054614543914795, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027830985840409995, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027830985840409995, "signal/frontier_coverage_15/centered_abs_mean": 0.13485134840011598, "signal/frontier_coverage_15/group_bin_occupancy": 0.865625, "signal/frontier_coverage_15/group_std_mean": 0.17428669035434724, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002413839101791382, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002413839101791382, "signal/frontier_coverage_20/centered_abs_mean": 0.0783051684498787, "signal/frontier_coverage_20/group_bin_occupancy": 0.9046875, "signal/frontier_coverage_20/group_std_mean": 0.10148594677448272, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014016624772921205, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014016624772921205, "signal/frontier_coverage_25/centered_abs_mean": 0.0531325563788414, "signal/frontier_coverage_25/group_bin_occupancy": 0.930859375, "signal/frontier_coverage_25/group_std_mean": 0.06878565400838851, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009510727250017226, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009510727250017226, "signal/frontier_coverage_5/centered_abs_mean": 0.15696605443954467, "signal/frontier_coverage_5/group_bin_occupancy": 0.87109375, "signal/frontier_coverage_5/group_std_mean": 0.20242418348789215, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002809692220762372, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002809692220762372, "signal/frontier_ece_reward/centered_abs_mean": 0.004732540622353554, "signal/frontier_ece_reward/group_bin_occupancy": 0.903125, "signal/frontier_ece_reward/group_std_mean": 0.006143409106880426, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005915675777941942, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005915675777941942, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28106330037117006, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.722265625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35157610177993776, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03513291254639626, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03513291254639626, "step": 275 }, { "calibration/aurc": 0.3373058154661964, "calibration/batch_distribution_entropy": 0.9826539498416601, "calibration/batch_entropy_100bins": 0.9682695002978061, "calibration/batch_entropy_10bins": 0.9826539498416601, "calibration/batch_entropy_50bins": 0.9762102980123724, "calibration/batch_uniqueness": 0.953095282009279, "calibration/buffer_distribution_entropy": 0.999126223691265, "calibration/buffer_entropy_100bins": 0.9990657505371372, "calibration/buffer_entropy_10bins": 0.999126223691265, "calibration/buffer_entropy_50bins": 0.9991405062324873, "calibration/confidence_entropy": 0.48298531010661866, "calibration/coverage@0%": 0.016410072162426615, "calibration/coverage@1%": 0.016410072162426615, "calibration/coverage@10%": 0.0601661876223092, "calibration/coverage@15%": 0.08399660591976517, "calibration/coverage@20%": 0.3101944716242661, "calibration/coverage@25%": 0.40160989481409004, "calibration/coverage@30%": 0.4906815680039139, "calibration/coverage@5%": 0.040628822162426616, "calibration/ece": 0.14296123700914443, "calibration/mean_confidence": 0.4870876866229802, "calibration/prompt_uniqueness": 0.8266549736602498, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 1082.6, "completions/max_terminated_length": 495.4, "completions/mean_length": 190.72861328125, "completions/mean_terminated_length": 190.20317687988282, "completions/min_length": 97.4, "completions/min_terminated_length": 97.4, "epoch": 0.896, "grad_norm": 0.0011140021961182356, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 946600824.0, "reward": 0.8575990200042725, "reward_std": 0.08079204559326172, "rewards/accuracy_reward": 0.54970703125, "rewards/brier_reward": 0.7908406734466553, "rewards/format_reward": 0.999609375, "rewards/frontier_aurc_reward": -0.0027651153039187195, "rewards/frontier_coverage_1": 0.10620979815721512, "rewards/frontier_coverage_10": 0.10566670447587967, "rewards/frontier_coverage_15": 0.09410437047481537, "rewards/frontier_coverage_20": 0.06202979385852814, "rewards/frontier_coverage_25": 0.04811366051435471, "rewards/frontier_coverage_5": 0.10607990473508835, "rewards/frontier_ece_reward": 0.0011842235224321484, "rewards/frontier_entropy_batch_reward": -0.20288202166557312, "signal/accuracy_reward/centered_abs_mean": 0.081243896484375, "signal/accuracy_reward/group_bin_occupancy": 0.16640625, "signal/accuracy_reward/group_std_mean": 0.11124018728733062, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0406219482421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0406219482421875, "signal/advantage_abs_mean": 0.061866439133882525, "signal/advantage_pre_scale_abs_mean": 0.061866439133882525, "signal/advantage_pre_scale_std": 0.0962506964802742, "signal/advantage_std": 0.0962506964802742, "signal/brier_reward/centered_abs_mean": 0.11059802174568176, "signal/brier_reward/group_bin_occupancy": 0.858984375, "signal/brier_reward/group_std_mean": 0.14146918654441834, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01382475271821022, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.01382475271821022, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_bin_occupancy": 0.1265625, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_aurc_reward/centered_abs_mean": 0.002222577598877251, "signal/frontier_aurc_reward/group_bin_occupancy": 0.73046875, "signal/frontier_aurc_reward/group_std_mean": 0.003623427450656891, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.978413733420894e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.978413733420894e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15609081983566284, "signal/frontier_coverage_1/group_bin_occupancy": 0.86484375, "signal/frontier_coverage_1/group_std_mean": 0.20036340057849883, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027940256986767054, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027940256986767054, "signal/frontier_coverage_10/centered_abs_mean": 0.15488055050373079, "signal/frontier_coverage_10/group_bin_occupancy": 0.864453125, "signal/frontier_coverage_10/group_std_mean": 0.19879828989505768, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027723620180040596, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027723620180040596, "signal/frontier_coverage_15/centered_abs_mean": 0.13422557562589646, "signal/frontier_coverage_15/group_bin_occupancy": 0.857421875, "signal/frontier_coverage_15/group_std_mean": 0.17230915725231172, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024026377592235803, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024026377592235803, "signal/frontier_coverage_20/centered_abs_mean": 0.07715532034635544, "signal/frontier_coverage_20/group_bin_occupancy": 0.881640625, "signal/frontier_coverage_20/group_std_mean": 0.09921250641345977, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013810801785439253, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013810801785439253, "signal/frontier_coverage_25/centered_abs_mean": 0.05220200940966606, "signal/frontier_coverage_25/group_bin_occupancy": 0.918359375, "signal/frontier_coverage_25/group_std_mean": 0.06679405272006989, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000934415915980935, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000934415915980935, "signal/frontier_coverage_5/centered_abs_mean": 0.1558253914117813, "signal/frontier_coverage_5/group_bin_occupancy": 0.8640625, "signal/frontier_coverage_5/group_std_mean": 0.20002435743808747, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027892745565623045, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027892745565623045, "signal/frontier_ece_reward/centered_abs_mean": 0.0045457611791789535, "signal/frontier_ece_reward/group_bin_occupancy": 0.897265625, "signal/frontier_ece_reward/group_std_mean": 0.005920033343136311, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005682201473973692, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005682201473973692, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2616787314414978, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3359409987926483, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03270984143018722, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03270984143018722, "step": 280 }, { "calibration/aurc": 0.3549690512751742, "calibration/batch_distribution_entropy": 0.977302492713015, "calibration/batch_entropy_100bins": 0.9687005838156748, "calibration/batch_entropy_10bins": 0.977302492713015, "calibration/batch_entropy_50bins": 0.9769308698120451, "calibration/batch_uniqueness": 0.9527252197265625, "calibration/buffer_distribution_entropy": 0.9990242862387232, "calibration/buffer_entropy_100bins": 0.9989872340708235, "calibration/buffer_entropy_10bins": 0.9990242862387232, "calibration/buffer_entropy_50bins": 0.9990547510383561, "calibration/confidence_entropy": 0.49627782732928577, "calibration/coverage@0%": 0.01796875, "calibration/coverage@1%": 0.01796875, "calibration/coverage@10%": 0.061328125, "calibration/coverage@15%": 0.096484375, "calibration/coverage@20%": 0.25859375, "calibration/coverage@25%": 0.393359375, "calibration/coverage@30%": 0.455859375, "calibration/coverage@5%": 0.0484375, "calibration/ece": 0.15157571405774412, "calibration/mean_confidence": 0.49334667608446414, "calibration/prompt_uniqueness": 0.845263671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 865.6, "completions/max_terminated_length": 455.2, "completions/mean_length": 191.36181640625, "completions/mean_terminated_length": 191.0985565185547, "completions/min_length": 86.0, "completions/min_terminated_length": 86.0, "epoch": 0.912, "grad_norm": 0.0010114161996170878, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 963611665.0, "reward": 0.8577099800109863, "reward_std": 0.08399459272623062, "rewards/accuracy_reward": 0.54443359375, "rewards/brier_reward": 0.8012910604476928, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0027018039952963592, "rewards/frontier_coverage_1": 0.1103449311107397, "rewards/frontier_coverage_10": 0.10959461368620396, "rewards/frontier_coverage_15": 0.09755977056920528, "rewards/frontier_coverage_20": 0.06432019025087357, "rewards/frontier_coverage_25": 0.05184435471892357, "rewards/frontier_coverage_5": 0.11015897234901786, "rewards/frontier_ece_reward": 0.001691946922801435, "rewards/frontier_entropy_batch_reward": -0.19574475586414336, "signal/accuracy_reward/centered_abs_mean": 0.080206298828125, "signal/accuracy_reward/group_bin_occupancy": 0.167578125, "signal/accuracy_reward/group_std_mean": 0.11207558661699295, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401031494140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0401031494140625, "signal/advantage_abs_mean": 0.06401625275611877, "signal/advantage_pre_scale_abs_mean": 0.06401625275611877, "signal/advantage_pre_scale_std": 0.10008785128593445, "signal/advantage_std": 0.10008785128593445, "signal/brier_reward/centered_abs_mean": 0.11323688179254532, "signal/brier_reward/group_bin_occupancy": 0.8328125, "signal/brier_reward/group_std_mean": 0.14842240512371063, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014154610224068165, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.014154610224068165, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023412939393892885, "signal/frontier_aurc_reward/group_bin_occupancy": 0.730859375, "signal/frontier_aurc_reward/group_std_mean": 0.003946681786328554, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.190915824437979e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.190915824437979e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15513492822647096, "signal/frontier_coverage_1/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_1/group_std_mean": 0.20151489973068237, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027769151609390976, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027769151609390976, "signal/frontier_coverage_10/centered_abs_mean": 0.15395722687244415, "signal/frontier_coverage_10/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_10/group_std_mean": 0.19999560713768005, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027558341156691314, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027558341156691314, "signal/frontier_coverage_15/centered_abs_mean": 0.13296782821416855, "signal/frontier_coverage_15/group_bin_occupancy": 0.853515625, "signal/frontier_coverage_15/group_std_mean": 0.17283936440944672, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023801239673048257, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023801239673048257, "signal/frontier_coverage_20/centered_abs_mean": 0.0778110533952713, "signal/frontier_coverage_20/group_bin_occupancy": 0.889453125, "signal/frontier_coverage_20/group_std_mean": 0.10108065158128739, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013928177999332548, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013928177999332548, "signal/frontier_coverage_25/centered_abs_mean": 0.0549514427781105, "signal/frontier_coverage_25/group_bin_occupancy": 0.9234375, "signal/frontier_coverage_25/group_std_mean": 0.07073460221290588, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000983630819246173, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000983630819246173, "signal/frontier_coverage_5/centered_abs_mean": 0.15491481125354767, "signal/frontier_coverage_5/group_bin_occupancy": 0.86328125, "signal/frontier_coverage_5/group_std_mean": 0.20123314261436462, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027729750145226717, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027729750145226717, "signal/frontier_ece_reward/centered_abs_mean": 0.004723855573683977, "signal/frontier_ece_reward/group_bin_occupancy": 0.8921875, "signal/frontier_ece_reward/group_std_mean": 0.006184379477053881, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005904819467104971, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005904819467104971, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2582594394683838, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33268279433250425, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032282429933547976, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032282429933547976, "step": 285 }, { "calibration/aurc": 0.41280512387379353, "calibration/batch_distribution_entropy": 0.983191233962286, "calibration/batch_entropy_100bins": 0.9707893275502993, "calibration/batch_entropy_10bins": 0.983191233962286, "calibration/batch_entropy_50bins": 0.9796747150760041, "calibration/batch_uniqueness": 0.9539581298828125, "calibration/buffer_distribution_entropy": 0.9990114243714435, "calibration/buffer_entropy_100bins": 0.9990231310977965, "calibration/buffer_entropy_10bins": 0.9990114243714435, "calibration/buffer_entropy_50bins": 0.9990782041438109, "calibration/confidence_entropy": 0.5056221875295627, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.0171875, "calibration/coverage@15%": 0.019140625, "calibration/coverage@20%": 0.033203125, "calibration/coverage@25%": 0.10234375, "calibration/coverage@30%": 0.267578125, "calibration/coverage@5%": 0.01171875, "calibration/ece": 0.13720565678424684, "calibration/mean_confidence": 0.5138671169361444, "calibration/prompt_uniqueness": 0.854052734375, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 698.8, "completions/max_terminated_length": 482.6, "completions/mean_length": 188.668359375, "completions/mean_terminated_length": 188.5367401123047, "completions/min_length": 93.4, "completions/min_terminated_length": 93.4, "epoch": 0.928, "grad_norm": 0.0009088640799745917, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 980570445.0, "reward": 0.8483018755912781, "reward_std": 0.08071554303169251, "rewards/accuracy_reward": 0.53251953125, "rewards/brier_reward": 0.7901157855987548, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.003404234582558274, "rewards/frontier_coverage_1": 0.11180114150047302, "rewards/frontier_coverage_10": 0.11092503815889358, "rewards/frontier_coverage_15": 0.09873643815517426, "rewards/frontier_coverage_20": 0.0643385447561741, "rewards/frontier_coverage_25": 0.052167801558971404, "rewards/frontier_coverage_5": 0.1116182416677475, "rewards/frontier_ece_reward": 0.0019177033798769116, "rewards/frontier_entropy_batch_reward": -0.21312889754772185, "signal/accuracy_reward/centered_abs_mean": 0.072637939453125, "signal/accuracy_reward/group_bin_occupancy": 0.163671875, "signal/accuracy_reward/group_std_mean": 0.10080017894506454, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363189697265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0363189697265625, "signal/advantage_abs_mean": 0.06181541979312897, "signal/advantage_pre_scale_abs_mean": 0.06181541979312897, "signal/advantage_pre_scale_std": 0.09706850945949555, "signal/advantage_std": 0.09706850945949555, "signal/brier_reward/centered_abs_mean": 0.111118184030056, "signal/brier_reward/group_bin_occupancy": 0.83671875, "signal/brier_reward/group_std_mean": 0.14327452182769776, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013889773003757, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013889773003757, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0029117835219949484, "signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375, "signal/frontier_aurc_reward/group_std_mean": 0.004748767055571079, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2120923646725716e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2120923646725716e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14507719576358796, "signal/frontier_coverage_1/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_1/group_std_mean": 0.1872227430343628, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002596881752833724, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002596881752833724, "signal/frontier_coverage_10/centered_abs_mean": 0.1439109742641449, "signal/frontier_coverage_10/group_bin_occupancy": 0.8546875, "signal/frontier_coverage_10/group_std_mean": 0.18574815690517427, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002576006343588233, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002576006343588233, "signal/frontier_coverage_15/centered_abs_mean": 0.1262580692768097, "signal/frontier_coverage_15/group_bin_occupancy": 0.846484375, "signal/frontier_coverage_15/group_std_mean": 0.16331798434257508, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022600193507969378, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022600193507969378, "signal/frontier_coverage_20/centered_abs_mean": 0.0749655857682228, "signal/frontier_coverage_20/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_20/group_std_mean": 0.09689257442951202, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013418839545920492, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013418839545920492, "signal/frontier_coverage_25/centered_abs_mean": 0.05532756522297859, "signal/frontier_coverage_25/group_bin_occupancy": 0.93046875, "signal/frontier_coverage_25/group_std_mean": 0.07019431442022324, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009903633617796004, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009903633617796004, "signal/frontier_coverage_5/centered_abs_mean": 0.14484555274248123, "signal/frontier_coverage_5/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_5/group_std_mean": 0.18693141639232635, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002592735271900892, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002592735271900892, "signal/frontier_ece_reward/centered_abs_mean": 0.004770417790859937, "signal/frontier_ece_reward/group_bin_occupancy": 0.89296875, "signal/frontier_ece_reward/group_std_mean": 0.0062720650807023045, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005963022238574922, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005963022238574922, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27077251076698305, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34694740176200867, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03384656384587288, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03384656384587288, "step": 290 }, { "calibration/aurc": 0.24858498275267107, "calibration/batch_distribution_entropy": 0.9861492452770714, "calibration/batch_entropy_100bins": 0.9737597827623423, "calibration/batch_entropy_10bins": 0.9861492452770714, "calibration/batch_entropy_50bins": 0.9812966268493726, "calibration/batch_uniqueness": 0.954254150390625, "calibration/buffer_distribution_entropy": 0.9988957247957387, "calibration/buffer_entropy_100bins": 0.9989956482917522, "calibration/buffer_entropy_10bins": 0.9988957247957387, "calibration/buffer_entropy_50bins": 0.9990108307728066, "calibration/confidence_entropy": 0.4974394635763413, "calibration/coverage@0%": 0.025, "calibration/coverage@1%": 0.025, "calibration/coverage@10%": 0.169921875, "calibration/coverage@15%": 0.289453125, "calibration/coverage@20%": 0.42890625, "calibration/coverage@25%": 0.5796875, "calibration/coverage@30%": 0.6609375, "calibration/coverage@5%": 0.09921875, "calibration/ece": 0.10266510009719523, "calibration/mean_confidence": 0.472734394472595, "calibration/prompt_uniqueness": 0.839453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 446.6, "completions/max_terminated_length": 446.6, "completions/mean_length": 187.97138671875, "completions/mean_terminated_length": 187.97138671875, "completions/min_length": 97.2, "completions/min_terminated_length": 97.2, "epoch": 0.944, "grad_norm": 0.0011019782396033406, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 997470696.0, "reward": 0.8485186576843262, "reward_std": 0.09234340786933899, "rewards/accuracy_reward": 0.53505859375, "rewards/brier_reward": 0.7864872336387634, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0030688193626701834, "rewards/frontier_coverage_1": 0.10997713655233383, "rewards/frontier_coverage_10": 0.10910578817129135, "rewards/frontier_coverage_15": 0.09824755191802978, "rewards/frontier_coverage_20": 0.06621812656521797, "rewards/frontier_coverage_25": 0.04666025787591934, "rewards/frontier_coverage_5": 0.10978993475437164, "rewards/frontier_ece_reward": 0.0015931544359773398, "rewards/frontier_entropy_batch_reward": -0.2170539140701294, "signal/accuracy_reward/centered_abs_mean": 0.112640380859375, "signal/accuracy_reward/group_bin_occupancy": 0.176171875, "signal/accuracy_reward/group_std_mean": 0.14598776698112487, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0563201904296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0563201904296875, "signal/advantage_abs_mean": 0.07284359484910966, "signal/advantage_pre_scale_abs_mean": 0.07284359484910966, "signal/advantage_pre_scale_std": 0.11034233421087265, "signal/advantage_std": 0.11034233421087265, "signal/brier_reward/centered_abs_mean": 0.11176075041294098, "signal/brier_reward/group_bin_occupancy": 0.832421875, "signal/brier_reward/group_std_mean": 0.14492084681987763, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013970093801617622, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013970093801617622, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0024296872783452273, "signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875, "signal/frontier_aurc_reward/group_std_mean": 0.004007898364216089, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.349140144768171e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.349140144768171e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.1726018726825714, "signal/frontier_coverage_1/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_1/group_std_mean": 0.22027516961097718, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003089573513716459, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003089573513716459, "signal/frontier_coverage_10/centered_abs_mean": 0.17125667929649352, "signal/frontier_coverage_10/group_bin_occupancy": 0.8515625, "signal/frontier_coverage_10/group_std_mean": 0.21858170330524446, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030654944013804196, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030654944013804196, "signal/frontier_coverage_15/centered_abs_mean": 0.1538640648126602, "signal/frontier_coverage_15/group_bin_occupancy": 0.846484375, "signal/frontier_coverage_15/group_std_mean": 0.19664142429828643, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027541667222976685, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027541667222976685, "signal/frontier_coverage_20/centered_abs_mean": 0.08876172602176666, "signal/frontier_coverage_20/group_bin_occupancy": 0.8796875, "signal/frontier_coverage_20/group_std_mean": 0.11390969753265381, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015888348687440157, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015888348687440157, "signal/frontier_coverage_25/centered_abs_mean": 0.054720057547092436, "signal/frontier_coverage_25/group_bin_occupancy": 0.919140625, "signal/frontier_coverage_25/group_std_mean": 0.07047712504863739, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009794890065677464, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009794890065677464, "signal/frontier_coverage_5/centered_abs_mean": 0.1723033905029297, "signal/frontier_coverage_5/group_bin_occupancy": 0.85234375, "signal/frontier_coverage_5/group_std_mean": 0.21990018784999849, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030842306092381476, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030842306092381476, "signal/frontier_ece_reward/centered_abs_mean": 0.005012043006718159, "signal/frontier_ece_reward/group_bin_occupancy": 0.90234375, "signal/frontier_ece_reward/group_std_mean": 0.006545277405530214, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006265053758397699, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006265053758397699, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27133584320545195, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726953125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34131971597671507, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033916980400681494, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033916980400681494, "step": 295 }, { "calibration/aurc": 0.3256529793283661, "calibration/batch_distribution_entropy": 0.984952501452715, "calibration/batch_entropy_100bins": 0.9718621359045295, "calibration/batch_entropy_10bins": 0.984952501452715, "calibration/batch_entropy_50bins": 0.9805238677858856, "calibration/batch_uniqueness": 0.954693603515625, "calibration/buffer_distribution_entropy": 0.9989409552203344, "calibration/buffer_entropy_100bins": 0.9990398793740454, "calibration/buffer_entropy_10bins": 0.9989409552203344, "calibration/buffer_entropy_50bins": 0.9990565379611495, "calibration/confidence_entropy": 0.4897216704300299, "calibration/coverage@0%": 0.01171875, "calibration/coverage@1%": 0.01171875, "calibration/coverage@10%": 0.104296875, "calibration/coverage@15%": 0.2359375, "calibration/coverage@20%": 0.308984375, "calibration/coverage@25%": 0.3640625, "calibration/coverage@30%": 0.5078125, "calibration/coverage@5%": 0.012109375, "calibration/ece": 0.13947520019378662, "calibration/mean_confidence": 0.5198152612431157, "calibration/prompt_uniqueness": 0.84501953125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 652.8, "completions/max_terminated_length": 458.2, "completions/mean_length": 188.3568359375, "completions/mean_terminated_length": 188.22555847167968, "completions/min_length": 101.8, "completions/min_terminated_length": 101.8, "epoch": 0.96, "grad_norm": 0.0008233313565142453, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 1014339790.0, "reward": 0.8462372064590454, "reward_std": 0.07700852155685425, "rewards/accuracy_reward": 0.5169921875, "rewards/brier_reward": 0.8101608753204346, "rewards/format_reward": 0.9998046875, "rewards/frontier_aurc_reward": -0.0029094903729856014, "rewards/frontier_coverage_1": 0.13561428487300872, "rewards/frontier_coverage_10": 0.13452683985233307, "rewards/frontier_coverage_15": 0.1212164431810379, "rewards/frontier_coverage_20": 0.07751094549894333, "rewards/frontier_coverage_25": 0.05298488959670067, "rewards/frontier_coverage_5": 0.135471972823143, "rewards/frontier_ece_reward": 0.0022863436490297316, "rewards/frontier_entropy_batch_reward": -0.20344921350479125, "signal/accuracy_reward/centered_abs_mean": 0.0708251953125, "signal/accuracy_reward/group_bin_occupancy": 0.162890625, "signal/accuracy_reward/group_std_mean": 0.0995680645108223, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03541259765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03541259765625, "signal/advantage_abs_mean": 0.05835134610533714, "signal/advantage_pre_scale_abs_mean": 0.05835134610533714, "signal/advantage_pre_scale_std": 0.09123541563749313, "signal/advantage_std": 0.09123541563749313, "signal/brier_reward/centered_abs_mean": 0.1000775396823883, "signal/brier_reward/group_bin_occupancy": 0.8765625, "signal/brier_reward/group_std_mean": 0.12929840236902237, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012509692460298538, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012509692460298538, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_bin_occupancy": 0.12578125, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_aurc_reward/centered_abs_mean": 0.0023374527459964155, "signal/frontier_aurc_reward/group_bin_occupancy": 0.745703125, "signal/frontier_aurc_reward/group_std_mean": 0.003886171476915479, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.184040299151093e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.184040299151093e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14079618453979492, "signal/frontier_coverage_1/group_bin_occupancy": 0.882421875, "signal/frontier_coverage_1/group_std_mean": 0.18256475627422333, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002520251739770174, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002520251739770174, "signal/frontier_coverage_10/centered_abs_mean": 0.13965638279914855, "signal/frontier_coverage_10/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_10/group_std_mean": 0.181106236577034, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024998491164296864, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024998491164296864, "signal/frontier_coverage_15/centered_abs_mean": 0.12565270364284514, "signal/frontier_coverage_15/group_bin_occupancy": 0.87421875, "signal/frontier_coverage_15/group_std_mean": 0.16323770582675934, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022491833195090296, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022491833195090296, "signal/frontier_coverage_20/centered_abs_mean": 0.0731646478176117, "signal/frontier_coverage_20/group_bin_occupancy": 0.895703125, "signal/frontier_coverage_20/group_std_mean": 0.0953644946217537, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013096471317112445, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013096471317112445, "signal/frontier_coverage_25/centered_abs_mean": 0.05031884089112282, "signal/frontier_coverage_25/group_bin_occupancy": 0.93359375, "signal/frontier_coverage_25/group_std_mean": 0.06426827237010002, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009007072076201439, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009007072076201439, "signal/frontier_coverage_5/centered_abs_mean": 0.14063106179237367, "signal/frontier_coverage_5/group_bin_occupancy": 0.8828125, "signal/frontier_coverage_5/group_std_mean": 0.18235519230365754, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025172959081828593, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025172959081828593, "signal/frontier_ece_reward/centered_abs_mean": 0.0048952271230518814, "signal/frontier_ece_reward/group_bin_occupancy": 0.89609375, "signal/frontier_ece_reward/group_std_mean": 0.006365635897964239, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006119033903814852, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006119033903814852, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26704747676849366, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73671875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33781918287277224, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03338093459606171, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03338093459606171, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4575152377276934, "eval_calibration/batch_distribution_entropy": 0.9083536036370418, "eval_calibration/batch_entropy_100bins": 0.7084673039767129, "eval_calibration/batch_entropy_10bins": 0.9083536036370418, "eval_calibration/batch_entropy_50bins": 0.7941923347176845, "eval_calibration/batch_uniqueness": 0.8974609375, "eval_calibration/buffer_distribution_entropy": 0.9987965314883902, "eval_calibration/buffer_entropy_100bins": 0.9989874140319082, "eval_calibration/buffer_entropy_10bins": 0.9987965314883902, "eval_calibration/buffer_entropy_50bins": 0.999000163227866, "eval_calibration/confidence_entropy": 0.4877032802303043, "eval_calibration/coverage@0%": 0.0859375, "eval_calibration/coverage@1%": 0.0859375, "eval_calibration/coverage@10%": 0.0859375, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.1015625, "eval_calibration/coverage@25%": 0.1484375, "eval_calibration/coverage@30%": 0.15625, "eval_calibration/coverage@5%": 0.0859375, "eval_calibration/ece": 0.20039907035790783, "eval_calibration/mean_confidence": 0.4575770178494413, "eval_calibration/prompt_uniqueness": 0.8974609375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 295.25, "eval_completions/max_terminated_length": 295.25, "eval_completions/mean_length": 189.70380020141602, "eval_completions/mean_terminated_length": 189.70380020141602, "eval_completions/min_length": 116.75, "eval_completions/min_terminated_length": 116.75, "eval_loss": 0.0, "eval_num_tokens": 1014339790.0, "eval_reward": 0.7071669399738312, "eval_reward_std": 0.226898942142725, "eval_rewards/accuracy_reward": 0.427734375, "eval_rewards/brier_reward": 0.807328924536705, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_aurc_reward": -0.003334582201205194, "eval_rewards/frontier_coverage_1": 0.20210690423846245, "eval_rewards/frontier_coverage_10": 0.2005590945482254, "eval_rewards/frontier_coverage_15": 0.18310636281967163, "eval_rewards/frontier_coverage_20": 0.10963826067745686, "eval_rewards/frontier_coverage_25": 0.05915482249110937, "eval_rewards/frontier_coverage_5": 0.20197707042098045, "eval_rewards/frontier_ece_reward": 0.002569766016677022, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 17.0706, "eval_samples_per_second": 29.29, "eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375, "eval_signal/accuracy_reward/group_bin_occupancy": 0.25, "eval_signal/accuracy_reward/group_std_mean": 0.4935857355594635, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23651123046875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875, "eval_signal/advantage_abs_mean": 0.21183785423636436, "eval_signal/advantage_pre_scale_abs_mean": 0.21183785423636436, "eval_signal/advantage_pre_scale_std": 0.22447463124990463, "eval_signal/advantage_std": 0.22447463124990463, "eval_signal/brier_reward/centered_abs_mean": 0.1758808195590973, "eval_signal/brier_reward/group_bin_occupancy": 0.9296875, "eval_signal/brier_reward/group_std_mean": 0.22358601912856102, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02198510244488716, "eval_signal/brier_reward/weight": 0.125, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02198510244488716, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_bin_occupancy": 0.125, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004215276916511357, "eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625, "eval_signal/frontier_aurc_reward/group_std_mean": 0.008374640950933099, "eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.545345397375058e-05, "eval_signal/frontier_aurc_reward/weight": 0.017899999395012856, "eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.545345397375058e-05, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3614576756954193, "eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_1/group_std_mean": 0.430373378098011, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00647009233944118, "eval_signal/frontier_coverage_1/weight": 0.017899999395012856, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00647009233944118, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35852116346359253, "eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_10/group_std_mean": 0.42694830149412155, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0064175286097452044, "eval_signal/frontier_coverage_10/weight": 0.017899999395012856, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0064175286097452044, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.32429099828004837, "eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_15/group_std_mean": 0.387409083545208, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005804808693937957, "eval_signal/frontier_coverage_15/weight": 0.017899999395012856, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005804808693937957, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1720643900334835, "eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8984375, "eval_signal/frontier_coverage_20/group_std_mean": 0.21237896382808685, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030799524392932653, "eval_signal/frontier_coverage_20/weight": 0.017899999395012856, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030799524392932653, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08441895246505737, "eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125, "eval_signal/frontier_coverage_25/group_std_mean": 0.1048442255705595, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015110991662368178, "eval_signal/frontier_coverage_25/weight": 0.017899999395012856, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015110991662368178, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3611843213438988, "eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125, "eval_signal/frontier_coverage_5/group_std_mean": 0.4300566017627716, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006465199403464794, "eval_signal/frontier_coverage_5/weight": 0.017899999395012856, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006465199403464794, "eval_signal/frontier_ece_reward/centered_abs_mean": 0.006295109633356333, "eval_signal/frontier_ece_reward/group_bin_occupancy": 0.96875, "eval_signal/frontier_ece_reward/group_std_mean": 0.008172678295522928, "eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0, "eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007868887041695416, "eval_signal/frontier_ece_reward/weight": 0.125, "eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007868887041695416, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.125, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.234, "step": 300 }, { "calibration/aurc": 0.26956602079129766, "calibration/batch_distribution_entropy": 0.975847157951916, "calibration/batch_entropy_100bins": 0.9649908946480957, "calibration/batch_entropy_10bins": 0.975847157951916, "calibration/batch_entropy_50bins": 0.9739094070267618, "calibration/batch_uniqueness": 0.95203857421875, "calibration/buffer_distribution_entropy": 0.9987995026989941, "calibration/buffer_entropy_100bins": 0.9989820996482228, "calibration/buffer_entropy_10bins": 0.9987995026989941, "calibration/buffer_entropy_50bins": 0.9989938509658627, "calibration/confidence_entropy": 0.5116255959622197, "calibration/coverage@0%": 0.014453125, "calibration/coverage@1%": 0.014453125, "calibration/coverage@10%": 0.26015625, "calibration/coverage@15%": 0.36953125, "calibration/coverage@20%": 0.46171875, "calibration/coverage@25%": 0.51875, "calibration/coverage@30%": 0.59140625, "calibration/coverage@5%": 0.1109375, "calibration/ece": 0.13497489066439078, "calibration/mean_confidence": 0.49586762510521404, "calibration/prompt_uniqueness": 0.840478515625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 517.2, "completions/max_terminated_length": 517.2, "completions/mean_length": 192.8546875, "completions/mean_terminated_length": 192.8546875, "completions/min_length": 104.6, "completions/min_terminated_length": 104.6, "epoch": 0.976, "grad_norm": 0.00106345908716321, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1031175742.0, "reward": 0.8589818477630615, "reward_std": 0.08459014743566513, "rewards/accuracy_reward": 0.554296875, "rewards/brier_reward": 0.7969029545783997, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.0025948323775082825, "rewards/frontier_coverage_1": 0.10237730550579727, "rewards/frontier_coverage_10": 0.10188237186521291, "rewards/frontier_coverage_15": 0.09465207355096936, "rewards/frontier_coverage_20": 0.066153160110116, "rewards/frontier_coverage_25": 0.04752057008445263, "rewards/frontier_coverage_5": 0.102328123152256, "rewards/frontier_ece_reward": 0.001362017064820975, "rewards/frontier_entropy_batch_reward": -0.2165709674358368, "signal/accuracy_reward/centered_abs_mean": 0.0876708984375, "signal/accuracy_reward/group_bin_occupancy": 0.1703125, "signal/accuracy_reward/group_std_mean": 0.12027212083339692, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04383544921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04383544921875, "signal/advantage_abs_mean": 0.06527443826198578, "signal/advantage_pre_scale_abs_mean": 0.06527443826198578, "signal/advantage_pre_scale_std": 0.09959482550621032, "signal/advantage_std": 0.09959482550621032, "signal/brier_reward/centered_abs_mean": 0.10134001821279526, "signal/brier_reward/group_bin_occupancy": 0.856640625, "signal/brier_reward/group_std_mean": 0.13163287788629532, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012667502276599407, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012667502276599407, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.002029223274439573, "signal/frontier_aurc_reward/group_bin_occupancy": 0.730078125, "signal/frontier_aurc_reward/group_std_mean": 0.0033127402421087027, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.632309453678317e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.632309453678317e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15118311196565629, "signal/frontier_coverage_1/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_1/group_std_mean": 0.1950996220111847, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002706177672371268, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002706177672371268, "signal/frontier_coverage_10/centered_abs_mean": 0.15004239976406097, "signal/frontier_coverage_10/group_bin_occupancy": 0.865234375, "signal/frontier_coverage_10/group_std_mean": 0.1936278909444809, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002685758890584111, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002685758890584111, "signal/frontier_coverage_15/centered_abs_mean": 0.13605864495038986, "signal/frontier_coverage_15/group_bin_occupancy": 0.863671875, "signal/frontier_coverage_15/group_std_mean": 0.1756308764219284, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002435449743643403, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002435449743643403, "signal/frontier_coverage_20/centered_abs_mean": 0.0769604966044426, "signal/frontier_coverage_20/group_bin_occupancy": 0.890234375, "signal/frontier_coverage_20/group_std_mean": 0.09997625052928924, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013775928178802132, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013775928178802132, "signal/frontier_coverage_25/centered_abs_mean": 0.04939193576574326, "signal/frontier_coverage_25/group_bin_occupancy": 0.9171875, "signal/frontier_coverage_25/group_std_mean": 0.06393922716379166, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008841155911795795, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008841155911795795, "signal/frontier_coverage_5/centered_abs_mean": 0.1510834127664566, "signal/frontier_coverage_5/group_bin_occupancy": 0.86640625, "signal/frontier_coverage_5/group_std_mean": 0.19497024416923522, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002704393118619919, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002704393118619919, "signal/frontier_ece_reward/centered_abs_mean": 0.004813673906028271, "signal/frontier_ece_reward/group_bin_occupancy": 0.8953125, "signal/frontier_ece_reward/group_std_mean": 0.006306731514632702, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006017092382535338, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006017092382535338, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2821305632591248, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3564057588577271, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0352663204073906, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0352663204073906, "step": 305 }, { "calibration/aurc": 0.34615962203217165, "calibration/batch_distribution_entropy": 0.9822478080256127, "calibration/batch_entropy_100bins": 0.9710197463283367, "calibration/batch_entropy_10bins": 0.9822478080256127, "calibration/batch_entropy_50bins": 0.9798762014394601, "calibration/batch_uniqueness": 0.9536600873594491, "calibration/buffer_distribution_entropy": 0.998897547628759, "calibration/buffer_entropy_100bins": 0.999052160716665, "calibration/buffer_entropy_10bins": 0.998897547628759, "calibration/buffer_entropy_50bins": 0.9990733510872982, "calibration/confidence_entropy": 0.4787588483252154, "calibration/coverage@0%": 0.01914826932485323, "calibration/coverage@1%": 0.01914826932485323, "calibration/coverage@10%": 0.06448752446183953, "calibration/coverage@15%": 0.11417563600782779, "calibration/coverage@20%": 0.1626505931996086, "calibration/coverage@25%": 0.4001697040117417, "calibration/coverage@30%": 0.483008959148728, "calibration/coverage@5%": 0.03282014432485323, "calibration/ece": 0.11840179079530369, "calibration/mean_confidence": 0.4507503788278635, "calibration/prompt_uniqueness": 0.8376709746520552, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 606.0, "completions/max_terminated_length": 390.0, "completions/mean_length": 189.25205078125, "completions/mean_terminated_length": 189.1205261230469, "completions/min_length": 102.8, "completions/min_terminated_length": 102.8, "epoch": 0.992, "grad_norm": 0.0010464171646162868, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1048242163.0, "reward": 0.8496513366699219, "reward_std": 0.08171502947807312, "rewards/accuracy_reward": 0.53125, "rewards/brier_reward": 0.8029752850532532, "rewards/format_reward": 0.99990234375, "rewards/frontier_aurc_reward": -0.002545328298583627, "rewards/frontier_coverage_1": 0.13065127432346343, "rewards/frontier_coverage_10": 0.12966825366020202, "rewards/frontier_coverage_15": 0.11746386885643005, "rewards/frontier_coverage_20": 0.07562436014413834, "rewards/frontier_coverage_25": 0.057191865891218184, "rewards/frontier_coverage_5": 0.13061045855283737, "rewards/frontier_ece_reward": 0.0018452441552653908, "rewards/frontier_entropy_batch_reward": -0.2236760824918747, "signal/accuracy_reward/centered_abs_mean": 0.083056640625, "signal/accuracy_reward/group_bin_occupancy": 0.163671875, "signal/accuracy_reward/group_std_mean": 0.10889570638537407, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415283203125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0415283203125, "signal/advantage_abs_mean": 0.06335543915629387, "signal/advantage_pre_scale_abs_mean": 0.06335543915629387, "signal/advantage_pre_scale_std": 0.0979221597313881, "signal/advantage_std": 0.0979221597313881, "signal/brier_reward/centered_abs_mean": 0.10264453142881394, "signal/brier_reward/group_bin_occupancy": 0.843359375, "signal/brier_reward/group_std_mean": 0.131204953789711, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012830566428601742, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.012830566428601742, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_bin_occupancy": 0.125390625, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_aurc_reward/centered_abs_mean": 0.0020562576595693828, "signal/frontier_aurc_reward/group_bin_occupancy": 0.74609375, "signal/frontier_aurc_reward/group_std_mean": 0.0034046342596411707, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.680701047414914e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.680701047414914e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.15361351668834686, "signal/frontier_coverage_1/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_1/group_std_mean": 0.1951121598482132, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00274968184530735, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00274968184530735, "signal/frontier_coverage_10/centered_abs_mean": 0.1525499314069748, "signal/frontier_coverage_10/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_10/group_std_mean": 0.1937567949295044, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027306437492370605, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027306437492370605, "signal/frontier_coverage_15/centered_abs_mean": 0.13804518431425095, "signal/frontier_coverage_15/group_bin_occupancy": 0.862109375, "signal/frontier_coverage_15/group_std_mean": 0.1754040390253067, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024710086872801185, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024710086872801185, "signal/frontier_coverage_20/centered_abs_mean": 0.07695982903242111, "signal/frontier_coverage_20/group_bin_occupancy": 0.89375, "signal/frontier_coverage_20/group_std_mean": 0.09852775484323502, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001377580827102065, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001377580827102065, "signal/frontier_coverage_25/centered_abs_mean": 0.05279005914926529, "signal/frontier_coverage_25/group_bin_occupancy": 0.92890625, "signal/frontier_coverage_25/group_std_mean": 0.06735634654760361, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449420729652047, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449420729652047, "signal/frontier_coverage_5/centered_abs_mean": 0.15354090929031372, "signal/frontier_coverage_5/group_bin_occupancy": 0.866796875, "signal/frontier_coverage_5/group_std_mean": 0.19501928389072418, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002748382231220603, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002748382231220603, "signal/frontier_ece_reward/centered_abs_mean": 0.004886813275516033, "signal/frontier_ece_reward/group_bin_occupancy": 0.918359375, "signal/frontier_ece_reward/group_std_mean": 0.0063082781620323655, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006108516594395041, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006108516594395041, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27488839626312256, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35261892080307006, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03436104953289032, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03436104953289032, "step": 310 }, { "calibration/aurc": 0.2688797599130984, "calibration/batch_distribution_entropy": 0.9675266719184563, "calibration/batch_entropy_100bins": 0.9649150285306254, "calibration/batch_entropy_10bins": 0.9675266719184563, "calibration/batch_entropy_50bins": 0.9690713029080542, "calibration/batch_uniqueness": 0.951904296875, "calibration/buffer_distribution_entropy": 0.9989472583325865, "calibration/buffer_entropy_100bins": 0.9990723220281166, "calibration/buffer_entropy_10bins": 0.9989472583325865, "calibration/buffer_entropy_50bins": 0.9991024787924342, "calibration/confidence_entropy": 0.46889996447870286, "calibration/coverage@0%": 0.01953125, "calibration/coverage@1%": 0.01953125, "calibration/coverage@10%": 0.03515625, "calibration/coverage@15%": 0.130859375, "calibration/coverage@20%": 0.2314453125, "calibration/coverage@25%": 0.4111328125, "calibration/coverage@30%": 0.73046875, "calibration/coverage@5%": 0.02734375, "calibration/ece": 0.14754415918657318, "calibration/mean_confidence": 0.581622264145669, "calibration/prompt_uniqueness": 0.8184814453125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.5, "completions/max_terminated_length": 380.5, "completions/mean_length": 186.61524963378906, "completions/mean_terminated_length": 186.61524963378906, "completions/min_length": 106.5, "completions/min_terminated_length": 106.5, "epoch": 0.9984, "num_tokens": 1055012517.0, "reward": 0.8589463829994202, "reward_std": 0.08560431376099586, "rewards/accuracy_reward": 0.558837890625, "rewards/brier_reward": 0.7794443070888519, "rewards/format_reward": 1.0, "rewards/frontier_aurc_reward": -0.0029379306361079216, "rewards/frontier_coverage_1": 0.07742930576205254, "rewards/frontier_coverage_10": 0.0766817256808281, "rewards/frontier_coverage_15": 0.06769302859902382, "rewards/frontier_coverage_20": 0.04756389185786247, "rewards/frontier_coverage_25": 0.04449248127639294, "rewards/frontier_coverage_5": 0.07734929025173187, "rewards/frontier_ece_reward": 0.0017147985054180026, "rewards/frontier_entropy_batch_reward": -0.20054014027118683, "signal/accuracy_reward/centered_abs_mean": 0.0843353271484375, "signal/accuracy_reward/group_bin_occupancy": 0.1689453125, "signal/accuracy_reward/group_std_mean": 0.117337416857481, "signal/accuracy_reward/group_zero_std_frac": 0.6484375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04216766357421875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04216766357421875, "signal/advantage_abs_mean": 0.06579190492630005, "signal/advantage_pre_scale_abs_mean": 0.06579190492630005, "signal/advantage_pre_scale_std": 0.10127944126725197, "signal/advantage_std": 0.10127944126725197, "signal/brier_reward/centered_abs_mean": 0.11069391667842865, "signal/brier_reward/group_bin_occupancy": 0.86328125, "signal/brier_reward/group_std_mean": 0.13984516263008118, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013836739584803581, "signal/brier_reward/weight": 0.125, "signal/brier_reward/weighted_centered_abs_mean": 0.013836739584803581, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_bin_occupancy": 0.125, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_aurc_reward/centered_abs_mean": 0.0025345467729493976, "signal/frontier_aurc_reward/group_bin_occupancy": 0.7607421875, "signal/frontier_aurc_reward/group_std_mean": 0.004020490450784564, "signal/frontier_aurc_reward/group_zero_std_frac": 0.0, "signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.536838969215751e-05, "signal/frontier_aurc_reward/weight": 0.017899999395012856, "signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.536838969215751e-05, "signal/frontier_coverage_1/centered_abs_mean": 0.14754138886928558, "signal/frontier_coverage_1/group_bin_occupancy": 0.8544921875, "signal/frontier_coverage_1/group_std_mean": 0.1890631541609764, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026409910060465336, "signal/frontier_coverage_1/weight": 0.017899999395012856, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026409910060465336, "signal/frontier_coverage_10/centered_abs_mean": 0.14654190093278885, "signal/frontier_coverage_10/group_bin_occupancy": 0.8544921875, "signal/frontier_coverage_10/group_std_mean": 0.1877775639295578, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623099833726883, "signal/frontier_coverage_10/weight": 0.017899999395012856, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623099833726883, "signal/frontier_coverage_15/centered_abs_mean": 0.13195443153381348, "signal/frontier_coverage_15/group_bin_occupancy": 0.8564453125, "signal/frontier_coverage_15/group_std_mean": 0.16884687542915344, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023619841085746884, "signal/frontier_coverage_15/weight": 0.017899999395012856, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023619841085746884, "signal/frontier_coverage_20/centered_abs_mean": 0.07070561498403549, "signal/frontier_coverage_20/group_bin_occupancy": 0.8916015625, "signal/frontier_coverage_20/group_std_mean": 0.09012233838438988, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001265630533453077, "signal/frontier_coverage_20/weight": 0.017899999395012856, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001265630533453077, "signal/frontier_coverage_25/centered_abs_mean": 0.04924464598298073, "signal/frontier_coverage_25/group_bin_occupancy": 0.92578125, "signal/frontier_coverage_25/group_std_mean": 0.06359815411269665, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008814791508484632, "signal/frontier_coverage_25/weight": 0.017899999395012856, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008814791508484632, "signal/frontier_coverage_5/centered_abs_mean": 0.1474618911743164, "signal/frontier_coverage_5/group_bin_occupancy": 0.85546875, "signal/frontier_coverage_5/group_std_mean": 0.18895908445119858, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026395675959065557, "signal/frontier_coverage_5/weight": 0.017899999395012856, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026395675959065557, "signal/frontier_ece_reward/centered_abs_mean": 0.004972347756847739, "signal/frontier_ece_reward/group_bin_occupancy": 0.9130859375, "signal/frontier_ece_reward/group_std_mean": 0.006475380156189203, "signal/frontier_ece_reward/group_zero_std_frac": 0.0, "signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006215434696059674, "signal/frontier_ece_reward/weight": 0.125, "signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006215434696059674, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2751055657863617, "signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.712890625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3516087681055069, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03438819572329521, "signal/frontier_entropy_batch_reward/weight": 0.125, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03438819572329521, "step": 312, "total_flos": 0.0, "train_loss": 0.0041191707219135985, "train_runtime": 59190.2375, "train_samples_per_second": 0.338, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1055012517, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }