Files
RLCR-v4-ks-bins100-ece100-h…/trainer_state.json
ModelHub XC c346f0c519 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-bins100-ece100-hotpot
Source: Original Platform
2026-04-11 03:10:58 +08:00

9940 lines
609 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6224643780433982,
"calibration/batch_distribution_entropy": 0.6544897379113672,
"calibration/batch_entropy_100bins": 0.4863029185249278,
"calibration/batch_entropy_10bins": 0.6544897379113672,
"calibration/batch_entropy_50bins": 0.5701004408606952,
"calibration/batch_uniqueness": 0.728444991952043,
"calibration/confidence_entropy": 0.34767197334474165,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4841265820092806,
"calibration/mean_confidence": 0.7931767989389904,
"calibration/prompt_uniqueness": 0.6103076405494752,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0353515625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1506.0,
"completions/mean_length": 271.31015625,
"completions/mean_terminated_length": 224.96051940917968,
"completions/min_length": 1.8,
"completions/min_terminated_length": 1.8,
"epoch": 0.016,
"grad_norm": 0.053285811096429825,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.0705,
"num_tokens": 17622248.0,
"reward": 0.49289684891700747,
"reward_std": 0.3958591163158417,
"rewards/accuracy_reward": 0.2240234375,
"rewards/brier_reward": 0.37564998865127563,
"rewards/format_reward": 0.67802734375,
"rewards/frontier_aurc_reward": 0.3027165472507477,
"rewards/frontier_coverage_1": 0.3027165472507477,
"rewards/frontier_coverage_10": 0.3027165472507477,
"rewards/frontier_coverage_15": 0.3027165472507477,
"rewards/frontier_coverage_20": 0.3027165472507477,
"rewards/frontier_coverage_25": 0.3027165472507477,
"rewards/frontier_coverage_5": 0.3027165472507477,
"rewards/frontier_ece_reward": 0.3027165472507477,
"rewards/frontier_entropy_batch_reward": -0.6468378663063049,
"signal/accuracy_reward/centered_abs_mean": 0.24012451171875,
"signal/accuracy_reward/group_bin_occupancy": 0.210546875,
"signal/accuracy_reward/group_std_mean": 0.2819916486740112,
"signal/accuracy_reward/group_zero_std_frac": 0.315625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.120062255859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.120062255859375,
"signal/advantage_abs_mean": 0.33822785019874574,
"signal/advantage_pre_scale_abs_mean": 0.33822785019874574,
"signal/advantage_pre_scale_std": 0.40998163223266604,
"signal/advantage_std": 0.40998163223266604,
"signal/brier_reward/centered_abs_mean": 0.320052570104599,
"signal/brier_reward/group_bin_occupancy": 0.745703125,
"signal/brier_reward/group_std_mean": 0.36434565782546996,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.04000657126307487,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.04000657126307487,
"signal/format_reward/centered_abs_mean": 0.408428955078125,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.45669829845428467,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.2042144775390625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.2042144775390625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.29173809885978697,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.662890625,
"signal/frontier_aurc_reward/group_std_mean": 0.34154740571975706,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.003125,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_1/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_1/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_1/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_10/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_10/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_10/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_15/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_15/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_15/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_20/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_20/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_20/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_25/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_25/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_25/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_5/centered_abs_mean": 0.29173809885978697,
"signal/frontier_coverage_5/group_bin_occupancy": 0.662890625,
"signal/frontier_coverage_5/group_std_mean": 0.34154740571975706,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005222111754119396,
"signal/frontier_ece_reward/centered_abs_mean": 0.29173809885978697,
"signal/frontier_ece_reward/group_bin_occupancy": 0.662890625,
"signal/frontier_ece_reward/group_std_mean": 0.34154740571975706,
"signal/frontier_ece_reward/group_zero_std_frac": 0.003125,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.03646726235747337,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.03646726235747337,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4318214237689972,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.308203125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4762145817279816,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.05397767797112465,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.05397767797112465,
"step": 5
},
{
"calibration/aurc": 0.6528287664498039,
"calibration/batch_distribution_entropy": 0.6268190631519651,
"calibration/batch_entropy_100bins": 0.4738420714903396,
"calibration/batch_entropy_10bins": 0.6268190631519651,
"calibration/batch_entropy_50bins": 0.554670162377654,
"calibration/batch_uniqueness": 0.705548191421581,
"calibration/confidence_entropy": 0.33243235077831834,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5101970205206909,
"calibration/mean_confidence": 0.7973768305353158,
"calibration/prompt_uniqueness": 0.5841085893747472,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.037109375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1523.4,
"completions/mean_length": 266.923828125,
"completions/mean_terminated_length": 218.05512084960938,
"completions/min_length": 2.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.021607212722301483,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0713,
"num_tokens": 35455900.0,
"reward": 0.5007035851478576,
"reward_std": 0.378324830532074,
"rewards/accuracy_reward": 0.2111328125,
"rewards/brier_reward": 0.37633253931999205,
"rewards/format_reward": 0.71708984375,
"rewards/frontier_aurc_reward": 0.2987588942050934,
"rewards/frontier_coverage_1": 0.2987588942050934,
"rewards/frontier_coverage_10": 0.2987588942050934,
"rewards/frontier_coverage_15": 0.2987588942050934,
"rewards/frontier_coverage_20": 0.2987588942050934,
"rewards/frontier_coverage_25": 0.2987588942050934,
"rewards/frontier_coverage_5": 0.2987588942050934,
"rewards/frontier_ece_reward": 0.2987588942050934,
"rewards/frontier_entropy_batch_reward": -0.6818291902542114,
"signal/accuracy_reward/centered_abs_mean": 0.22430419921875,
"signal/accuracy_reward/group_bin_occupancy": 0.209765625,
"signal/accuracy_reward/group_std_mean": 0.2700383305549622,
"signal/accuracy_reward/group_zero_std_frac": 0.321875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.112152099609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.112152099609375,
"signal/advantage_abs_mean": 0.3131078124046326,
"signal/advantage_pre_scale_abs_mean": 0.3131078124046326,
"signal/advantage_pre_scale_std": 0.3932444155216217,
"signal/advantage_std": 0.3932444155216217,
"signal/brier_reward/centered_abs_mean": 0.3091658055782318,
"signal/brier_reward/group_bin_occupancy": 0.756640625,
"signal/brier_reward/group_std_mean": 0.3574398994445801,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.038645725697278976,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.038645725697278976,
"signal/format_reward/centered_abs_mean": 0.377227783203125,
"signal/format_reward/group_bin_occupancy": 0.25,
"signal/format_reward/group_std_mean": 0.43797464966773986,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.1886138916015625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1886138916015625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.2825876474380493,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.67109375,
"signal/frontier_aurc_reward/group_std_mean": 0.33760352730751036,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_1/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_1/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_1/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_10/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_10/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_10/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_15/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_15/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_15/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_20/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_20/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_20/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_25/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_25/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_25/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_5/centered_abs_mean": 0.2825876474380493,
"signal/frontier_coverage_5/group_bin_occupancy": 0.67109375,
"signal/frontier_coverage_5/group_std_mean": 0.33760352730751036,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005058318562805653,
"signal/frontier_ece_reward/centered_abs_mean": 0.2825876474380493,
"signal/frontier_ece_reward/group_bin_occupancy": 0.67109375,
"signal/frontier_ece_reward/group_std_mean": 0.33760352730751036,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.035323455929756165,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.035323455929756165,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4059325873851776,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.315625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4605302751064301,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0507415734231472,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0507415734231472,
"step": 10
},
{
"calibration/aurc": 0.616971247492587,
"calibration/batch_distribution_entropy": 0.6409075565843491,
"calibration/batch_entropy_100bins": 0.4772482403805065,
"calibration/batch_entropy_10bins": 0.6409075565843491,
"calibration/batch_entropy_50bins": 0.5566394338845917,
"calibration/batch_uniqueness": 0.7049275200846712,
"calibration/buffer_distribution_entropy": 0.6588770403392903,
"calibration/buffer_entropy_100bins": 0.49200675404876176,
"calibration/buffer_entropy_10bins": 0.6588770403392903,
"calibration/buffer_entropy_50bins": 0.5748770253713922,
"calibration/confidence_entropy": 0.34808615842826207,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4785652202870275,
"calibration/mean_confidence": 0.8042481872309974,
"calibration/prompt_uniqueness": 0.6151282665489992,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.015625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1420.2,
"completions/mean_length": 204.5537109375,
"completions/mean_terminated_length": 183.505908203125,
"completions/min_length": 3.2,
"completions/min_terminated_length": 3.2,
"epoch": 0.048,
"grad_norm": 0.011996953748166561,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0439,
"num_tokens": 52599266.0,
"reward": 0.608043098449707,
"reward_std": 0.30430689454078674,
"rewards/accuracy_reward": 0.274609375,
"rewards/brier_reward": 0.4852728068828583,
"rewards/format_reward": 0.8759765625,
"rewards/frontier_aurc_reward": 0.30132074588909746,
"rewards/frontier_coverage_1": 0.3177640035748482,
"rewards/frontier_coverage_10": 0.3177640035748482,
"rewards/frontier_coverage_15": 0.3177640035748482,
"rewards/frontier_coverage_20": 0.3177640035748482,
"rewards/frontier_coverage_25": 0.3177640035748482,
"rewards/frontier_coverage_5": 0.3177640035748482,
"rewards/frontier_ece_reward": 0.2919433981180191,
"rewards/frontier_entropy_batch_reward": -0.8313869953155517,
"signal/accuracy_reward/centered_abs_mean": 0.2015869140625,
"signal/accuracy_reward/group_bin_occupancy": 0.2046875,
"signal/accuracy_reward/group_std_mean": 0.2485917925834656,
"signal/accuracy_reward/group_zero_std_frac": 0.3625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10079345703125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10079345703125,
"signal/advantage_abs_mean": 0.2351256161928177,
"signal/advantage_pre_scale_abs_mean": 0.2351256161928177,
"signal/advantage_pre_scale_std": 0.31978016495704653,
"signal/advantage_std": 0.31978016495704653,
"signal/brier_reward/centered_abs_mean": 0.2747634917497635,
"signal/brier_reward/group_bin_occupancy": 0.794921875,
"signal/brier_reward/group_std_mean": 0.330036336183548,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.03434543646872044,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.03434543646872044,
"signal/format_reward/centered_abs_mean": 0.20308837890625,
"signal/format_reward/group_bin_occupancy": 0.24375,
"signal/format_reward/group_std_mean": 0.3047700166702271,
"signal/format_reward/group_zero_std_frac": 0.05,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.101544189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.101544189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.21934852562844753,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_aurc_reward/group_std_mean": 0.2648093054071069,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.003926338179735467,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.003926338179735467,
"signal/frontier_coverage_1/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_1/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_1/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_10/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_10/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_10/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_15/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_15/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_15/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_20/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_20/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_20/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_25/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_25/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_25/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_5/centered_abs_mean": 0.23966625183820725,
"signal/frontier_coverage_5/group_bin_occupancy": 0.708984375,
"signal/frontier_coverage_5/group_std_mean": 0.29589507579803465,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004290025448426604,
"signal/frontier_ece_reward/centered_abs_mean": 0.2458495855331421,
"signal/frontier_ece_reward/group_bin_occupancy": 0.716796875,
"signal/frontier_ece_reward/group_std_mean": 0.29648907482624054,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.030731198191642762,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.030731198191642762,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2641173452138901,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.340625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3699604392051697,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.015625,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03301466815173626,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03301466815173626,
"step": 15
},
{
"calibration/aurc": 0.53447438580555,
"calibration/batch_distribution_entropy": 0.697833452595151,
"calibration/batch_entropy_100bins": 0.5162966612875538,
"calibration/batch_entropy_10bins": 0.697833452595151,
"calibration/batch_entropy_50bins": 0.601463977565162,
"calibration/batch_uniqueness": 0.7540542644025435,
"calibration/buffer_distribution_entropy": 0.6562396115815237,
"calibration/buffer_entropy_100bins": 0.49265615739430785,
"calibration/buffer_entropy_10bins": 0.6562396115815237,
"calibration/buffer_entropy_50bins": 0.5747150103358988,
"calibration/confidence_entropy": 0.361725120029885,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3901462729077969,
"calibration/mean_confidence": 0.7812206546712639,
"calibration/prompt_uniqueness": 0.6691434151673705,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.004296875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 1216.6,
"completions/mean_length": 143.82763671875,
"completions/mean_terminated_length": 137.8384002685547,
"completions/min_length": 34.0,
"completions/min_terminated_length": 34.0,
"epoch": 0.064,
"grad_norm": 0.0032386924140155315,
"learning_rate": 1e-06,
"loss": 0.0098,
"num_tokens": 68990461.0,
"reward": 0.6213819026947022,
"reward_std": 0.20088508129119872,
"rewards/accuracy_reward": 0.3435546875,
"rewards/brier_reward": 0.5725475311279297,
"rewards/format_reward": 0.980078125,
"rewards/frontier_aurc_reward": -0.007014566846191883,
"rewards/frontier_coverage_1": 0.06326824426651001,
"rewards/frontier_coverage_10": 0.06326824426651001,
"rewards/frontier_coverage_15": 0.06326824426651001,
"rewards/frontier_coverage_20": 0.06326824426651001,
"rewards/frontier_coverage_25": 0.06326824426651001,
"rewards/frontier_coverage_5": 0.06326824426651001,
"rewards/frontier_ece_reward": -0.046458789124153556,
"rewards/frontier_entropy_batch_reward": -0.9029202818870544,
"signal/accuracy_reward/centered_abs_mean": 0.2005615234375,
"signal/accuracy_reward/group_bin_occupancy": 0.207421875,
"signal/accuracy_reward/group_std_mean": 0.25139918029308317,
"signal/accuracy_reward/group_zero_std_frac": 0.340625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.10028076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.10028076171875,
"signal/advantage_abs_mean": 0.15537169873714446,
"signal/advantage_pre_scale_abs_mean": 0.15537169873714446,
"signal/advantage_pre_scale_std": 0.21755909621715547,
"signal/advantage_std": 0.21755909621715547,
"signal/brier_reward/centered_abs_mean": 0.24199655055999755,
"signal/brier_reward/group_bin_occupancy": 0.828515625,
"signal/brier_reward/group_std_mean": 0.298342889547348,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030249568819999694,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.030249568819999694,
"signal/format_reward/centered_abs_mean": 0.03763427734375,
"signal/format_reward/group_bin_occupancy": 0.18203125,
"signal/format_reward/group_std_mean": 0.0920264482498169,
"signal/format_reward/group_zero_std_frac": 0.54375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.018817138671875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.018817138671875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.005011124256998301,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.75546875,
"signal/frontier_aurc_reward/group_std_mean": 0.006882566865533591,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 8.969911868916824e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 8.969911868916824e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_1/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_1/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_10/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_10/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_10/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_15/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_15/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_15/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_20/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_20/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_20/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_25/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_25/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_25/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_5/centered_abs_mean": 0.10851092785596847,
"signal/frontier_coverage_5/group_bin_occupancy": 0.6796875,
"signal/frontier_coverage_5/group_std_mean": 0.16966440081596373,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019423455698415637,
"signal/frontier_ece_reward/centered_abs_mean": 0.14095230400562286,
"signal/frontier_ece_reward/group_bin_occupancy": 0.76015625,
"signal/frontier_ece_reward/group_std_mean": 0.16866419315338135,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.017619038000702858,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.017619038000702858,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.16997582614421844,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.358984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.2987139880657196,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.071875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.021246978268027305,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021246978268027305,
"step": 20
},
{
"calibration/aurc": 0.6205064937493211,
"calibration/batch_distribution_entropy": 0.8188591589225596,
"calibration/batch_entropy_100bins": 0.6092270818068604,
"calibration/batch_entropy_10bins": 0.8188591589225596,
"calibration/batch_entropy_50bins": 0.6969438827371384,
"calibration/batch_uniqueness": 0.8361927117817313,
"calibration/buffer_distribution_entropy": 0.6856559337312504,
"calibration/buffer_entropy_100bins": 0.5146458039959066,
"calibration/buffer_entropy_10bins": 0.6856559337312504,
"calibration/buffer_entropy_50bins": 0.5978699532757095,
"calibration/confidence_entropy": 0.4192333208619353,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.3946832247035178,
"calibration/mean_confidence": 0.705443334403485,
"calibration/prompt_uniqueness": 0.7620859228603916,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 1378.0,
"completions/max_terminated_length": 926.6,
"completions/mean_length": 121.5697265625,
"completions/mean_terminated_length": 119.77201538085937,
"completions/min_length": 28.2,
"completions/min_terminated_length": 28.2,
"epoch": 0.08,
"grad_norm": 0.012476031668484211,
"learning_rate": 1e-06,
"loss": 0.0025,
"num_tokens": 85168487.0,
"reward": 0.6544445514678955,
"reward_std": 0.18224802613258362,
"rewards/accuracy_reward": 0.36328125,
"rewards/brier_reward": 0.6255658030509949,
"rewards/format_reward": 0.99404296875,
"rewards/frontier_aurc_reward": -0.006063262652605772,
"rewards/frontier_coverage_1": 0.07632581368088723,
"rewards/frontier_coverage_10": 0.07632581368088723,
"rewards/frontier_coverage_15": 0.07632581368088723,
"rewards/frontier_coverage_20": 0.07632581368088723,
"rewards/frontier_coverage_25": 0.07632581368088723,
"rewards/frontier_coverage_5": 0.07632581368088723,
"rewards/frontier_ece_reward": -0.03705122843384743,
"rewards/frontier_entropy_batch_reward": -0.8469658613204956,
"signal/accuracy_reward/centered_abs_mean": 0.188232421875,
"signal/accuracy_reward/group_bin_occupancy": 0.203125,
"signal/accuracy_reward/group_std_mean": 0.2347244828939438,
"signal/accuracy_reward/group_zero_std_frac": 0.375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0941162109375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0941162109375,
"signal/advantage_abs_mean": 0.14390270113945008,
"signal/advantage_pre_scale_abs_mean": 0.14390270113945008,
"signal/advantage_pre_scale_std": 0.1975090980529785,
"signal/advantage_std": 0.1975090980529785,
"signal/brier_reward/centered_abs_mean": 0.23567027747631072,
"signal/brier_reward/group_bin_occupancy": 0.867578125,
"signal/brier_reward/group_std_mean": 0.28802819848060607,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02945878468453884,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02945878468453884,
"signal/format_reward/centered_abs_mean": 0.011297607421875,
"signal/format_reward/group_bin_occupancy": 0.144921875,
"signal/format_reward/group_std_mean": 0.029941194131970404,
"signal/format_reward/group_zero_std_frac": 0.840625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0056488037109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0056488037109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0037031634245067837,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.751953125,
"signal/frontier_aurc_reward/group_std_mean": 0.005138655751943588,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.628662376897409e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.628662376897409e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_1/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_1/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_10/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_10/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_10/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_15/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_15/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_15/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_20/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_20/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_20/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_25/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_25/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_25/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_5/centered_abs_mean": 0.1505482792854309,
"signal/frontier_coverage_5/group_bin_occupancy": 0.77265625,
"signal/frontier_coverage_5/group_std_mean": 0.21810686886310576,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002694814093410969,
"signal/frontier_ece_reward/centered_abs_mean": 0.1332566112279892,
"signal/frontier_ece_reward/group_bin_occupancy": 0.82265625,
"signal/frontier_ece_reward/group_std_mean": 0.16948509812355042,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.01665707640349865,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.01665707640349865,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2531877249479294,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.450390625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3905863881111145,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.021875,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.031648465618491176,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031648465618491176,
"step": 25
},
{
"calibration/aurc": 0.6337002880818469,
"calibration/batch_distribution_entropy": 0.9515234692633457,
"calibration/batch_entropy_100bins": 0.7790651280496668,
"calibration/batch_entropy_10bins": 0.9515234692633457,
"calibration/batch_entropy_50bins": 0.848934936739482,
"calibration/batch_uniqueness": 0.909145581071251,
"calibration/buffer_distribution_entropy": 0.7460870471656004,
"calibration/buffer_entropy_100bins": 0.565529106007607,
"calibration/buffer_entropy_10bins": 0.7460870471656004,
"calibration/buffer_entropy_50bins": 0.6492569955197386,
"calibration/confidence_entropy": 0.49321506704742396,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.29262828029023924,
"calibration/mean_confidence": 0.5433459062155098,
"calibration/prompt_uniqueness": 0.8469597941687622,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0021484375,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 929.8,
"completions/mean_length": 118.1986328125,
"completions/mean_terminated_length": 115.14469146728516,
"completions/min_length": 38.6,
"completions/min_terminated_length": 38.6,
"epoch": 0.096,
"grad_norm": 0.0032025109976530075,
"learning_rate": 1e-06,
"loss": 0.0048,
"num_tokens": 101423449.0,
"reward": 0.6895796895027161,
"reward_std": 0.17854192554950715,
"rewards/accuracy_reward": 0.35771484375,
"rewards/brier_reward": 0.6795345783233643,
"rewards/format_reward": 0.99248046875,
"rewards/frontier_aurc_reward": -0.005398123059421778,
"rewards/frontier_coverage_1": 0.10967106521129608,
"rewards/frontier_coverage_10": 0.10967106521129608,
"rewards/frontier_coverage_15": 0.10967106521129608,
"rewards/frontier_coverage_20": 0.10967106521129608,
"rewards/frontier_coverage_25": 0.10967106521129608,
"rewards/frontier_coverage_5": 0.10967106521129608,
"rewards/frontier_ece_reward": -0.026541496440768243,
"rewards/frontier_entropy_batch_reward": -0.6305931687355042,
"signal/accuracy_reward/centered_abs_mean": 0.189324951171875,
"signal/accuracy_reward/group_bin_occupancy": 0.20390625,
"signal/accuracy_reward/group_std_mean": 0.2379360795021057,
"signal/accuracy_reward/group_zero_std_frac": 0.36875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0946624755859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0946624755859375,
"signal/advantage_abs_mean": 0.1388890862464905,
"signal/advantage_pre_scale_abs_mean": 0.1388890862464905,
"signal/advantage_pre_scale_std": 0.1868252784013748,
"signal/advantage_std": 0.1868252784013748,
"signal/brier_reward/centered_abs_mean": 0.2370523989200592,
"signal/brier_reward/group_bin_occupancy": 0.90390625,
"signal/brier_reward/group_std_mean": 0.288687926530838,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0296315498650074,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.0296315498650074,
"signal/format_reward/centered_abs_mean": 0.014483642578125,
"signal/format_reward/group_bin_occupancy": 0.15234375,
"signal/format_reward/group_std_mean": 0.04018273241817951,
"signal/format_reward/group_zero_std_frac": 0.78125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0072418212890625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0072418212890625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002838827669620514,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.723828125,
"signal/frontier_aurc_reward/group_std_mean": 0.004378228541463613,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.0815014401450756e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.0815014401450756e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_1/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_1/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_10/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_10/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_10/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_15/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_15/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_15/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_20/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_20/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_20/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_25/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_25/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_25/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_5/centered_abs_mean": 0.23764651417732238,
"signal/frontier_coverage_5/group_bin_occupancy": 0.908984375,
"signal/frontier_coverage_5/group_std_mean": 0.3083926856517792,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004253872437402606,
"signal/frontier_ece_reward/centered_abs_mean": 0.11303882747888565,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83671875,
"signal/frontier_ece_reward/group_std_mean": 0.15580815970897674,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.014129853434860706,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.014129853434860706,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4395421028137207,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.62890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5271062850952148,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.054942762851715087,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.054942762851715087,
"step": 30
},
{
"calibration/aurc": 0.511020845461892,
"calibration/batch_distribution_entropy": 0.9269066170510761,
"calibration/batch_entropy_100bins": 0.9245069542553294,
"calibration/batch_entropy_10bins": 0.9269066170510761,
"calibration/batch_entropy_50bins": 0.9373367432265832,
"calibration/batch_uniqueness": 0.9446476489301399,
"calibration/buffer_distribution_entropy": 0.829412139862835,
"calibration/buffer_entropy_100bins": 0.6628959346066731,
"calibration/buffer_entropy_10bins": 0.829412139862835,
"calibration/buffer_entropy_50bins": 0.7391661237028551,
"calibration/confidence_entropy": 0.5010959880267044,
"calibration/coverage@0%": 0.004707438321562174,
"calibration/coverage@1%": 0.004707438321562174,
"calibration/coverage@10%": 0.004707438321562174,
"calibration/coverage@15%": 0.005100365630010111,
"calibration/coverage@20%": 0.00745639160473512,
"calibration/coverage@25%": 0.01687046764772599,
"calibration/coverage@30%": 0.0360861539222358,
"calibration/coverage@5%": 0.004707438321562174,
"calibration/ece": 0.19524205952621834,
"calibration/mean_confidence": 0.3591607106056175,
"calibration/prompt_uniqueness": 0.8846453539155539,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00185546875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 867.0,
"completions/mean_length": 117.60078125,
"completions/mean_terminated_length": 114.96354064941406,
"completions/min_length": 42.0,
"completions/min_terminated_length": 42.0,
"epoch": 0.112,
"grad_norm": 0.002222000854089856,
"learning_rate": 1e-06,
"loss": 0.0057,
"num_tokens": 117737153.0,
"reward": 0.7422285556793213,
"reward_std": 0.14107392430305482,
"rewards/accuracy_reward": 0.387109375,
"rewards/brier_reward": 0.7211146593093872,
"rewards/format_reward": 0.99482421875,
"rewards/frontier_aurc_reward": -0.0045266709290444854,
"rewards/frontier_coverage_1": 0.13778235018253326,
"rewards/frontier_coverage_10": 0.13778235018253326,
"rewards/frontier_coverage_15": 0.13778235018253326,
"rewards/frontier_coverage_20": 0.13778235018253326,
"rewards/frontier_coverage_25": 0.13778235018253326,
"rewards/frontier_coverage_5": 0.13778235018253326,
"rewards/frontier_ece_reward": -0.0034593752585351466,
"rewards/frontier_entropy_batch_reward": -0.42529548406600953,
"signal/accuracy_reward/centered_abs_mean": 0.190576171875,
"signal/accuracy_reward/group_bin_occupancy": 0.20546875,
"signal/accuracy_reward/group_std_mean": 0.24057506322860717,
"signal/accuracy_reward/group_zero_std_frac": 0.35625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0952880859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0952880859375,
"signal/advantage_abs_mean": 0.10944210141897201,
"signal/advantage_pre_scale_abs_mean": 0.10944210141897201,
"signal/advantage_pre_scale_std": 0.1517003059387207,
"signal/advantage_std": 0.1517003059387207,
"signal/brier_reward/centered_abs_mean": 0.20343652367591858,
"signal/brier_reward/group_bin_occupancy": 0.880078125,
"signal/brier_reward/group_std_mean": 0.2546698063611984,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.025429565459489822,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.025429565459489822,
"signal/format_reward/centered_abs_mean": 0.009954833984375,
"signal/format_reward/group_bin_occupancy": 0.14375,
"signal/format_reward/group_std_mean": 0.027537884190678596,
"signal/format_reward/group_zero_std_frac": 0.85,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0049774169921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0049774169921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0012978114187717437,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72578125,
"signal/frontier_aurc_reward/group_std_mean": 0.0021451528184115885,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3230824081110767e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3230824081110767e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_1/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_1/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_10/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_10/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_10/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_15/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_15/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_15/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_20/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_20/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_20/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_25/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_25/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_25/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_5/centered_abs_mean": 0.317331862449646,
"signal/frontier_coverage_5/group_bin_occupancy": 0.941796875,
"signal/frontier_coverage_5/group_std_mean": 0.3929042756557465,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005680239945650101,
"signal/frontier_ece_reward/centered_abs_mean": 0.05593574643135071,
"signal/frontier_ece_reward/group_bin_occupancy": 0.801953125,
"signal/frontier_ece_reward/group_std_mean": 0.08778993785381317,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006991968303918838,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006991968303918838,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.448394775390625,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5047555208206177,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.056049346923828125,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.056049346923828125,
"step": 35
},
{
"calibration/aurc": 0.5883847168275128,
"calibration/batch_distribution_entropy": 0.91090173474976,
"calibration/batch_entropy_100bins": 0.931093997601525,
"calibration/batch_entropy_10bins": 0.91090173474976,
"calibration/batch_entropy_50bins": 0.9358617585488869,
"calibration/batch_uniqueness": 0.9426319952686615,
"calibration/buffer_distribution_entropy": 0.893493430028969,
"calibration/buffer_entropy_100bins": 0.7499790530020946,
"calibration/buffer_entropy_10bins": 0.893493430028969,
"calibration/buffer_entropy_50bins": 0.8144149519247129,
"calibration/confidence_entropy": 0.5064471270952413,
"calibration/coverage@0%": 0.002359110808594898,
"calibration/coverage@1%": 0.002359110808594898,
"calibration/coverage@10%": 0.002359110808594898,
"calibration/coverage@15%": 0.002359110808594898,
"calibration/coverage@20%": 0.005897777021487245,
"calibration/coverage@25%": 0.005897777021487245,
"calibration/coverage@30%": 0.006290704329935182,
"calibration/coverage@5%": 0.002359110808594898,
"calibration/ece": 0.19177551118694733,
"calibration/mean_confidence": 0.3377996746346041,
"calibration/prompt_uniqueness": 0.8836925240634755,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00283203125,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 896.0,
"completions/mean_length": 122.0451171875,
"completions/mean_terminated_length": 118.02766723632813,
"completions/min_length": 40.2,
"completions/min_terminated_length": 40.2,
"epoch": 0.128,
"grad_norm": 0.0016771440859884024,
"learning_rate": 1e-06,
"loss": 0.0077,
"num_tokens": 133903567.0,
"reward": 0.7402719259262085,
"reward_std": 0.13263684809207915,
"rewards/accuracy_reward": 0.380078125,
"rewards/brier_reward": 0.7196099877357482,
"rewards/format_reward": 0.9953125,
"rewards/frontier_aurc_reward": -0.0044641831889748575,
"rewards/frontier_coverage_1": 0.1357348829507828,
"rewards/frontier_coverage_10": 0.1357348829507828,
"rewards/frontier_coverage_15": 0.1357348829507828,
"rewards/frontier_coverage_20": 0.1357348829507828,
"rewards/frontier_coverage_25": 0.1357348829507828,
"rewards/frontier_coverage_5": 0.1357348829507828,
"rewards/frontier_ece_reward": 0.0004683260805904865,
"rewards/frontier_entropy_batch_reward": -0.4154496967792511,
"signal/accuracy_reward/centered_abs_mean": 0.174169921875,
"signal/accuracy_reward/group_bin_occupancy": 0.203515625,
"signal/accuracy_reward/group_std_mean": 0.2256518006324768,
"signal/accuracy_reward/group_zero_std_frac": 0.371875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0870849609375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0870849609375,
"signal/advantage_abs_mean": 0.10101482570171356,
"signal/advantage_pre_scale_abs_mean": 0.10101482570171356,
"signal/advantage_pre_scale_std": 0.1429665595293045,
"signal/advantage_std": 0.1429665595293045,
"signal/brier_reward/centered_abs_mean": 0.19600196480751036,
"signal/brier_reward/group_bin_occupancy": 0.88515625,
"signal/brier_reward/group_std_mean": 0.24760811030864716,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024500245600938796,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024500245600938796,
"signal/format_reward/centered_abs_mean": 0.00906982421875,
"signal/format_reward/group_bin_occupancy": 0.143359375,
"signal/format_reward/group_std_mean": 0.026180195435881615,
"signal/format_reward/group_zero_std_frac": 0.853125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004534912109375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.004534912109375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0013354318216443062,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773828125,
"signal/frontier_aurc_reward/group_std_mean": 0.0020624040393158794,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 2.3904228874016555e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 2.3904228874016555e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_1/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_1/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_10/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_10/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_10/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_15/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_15/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_15/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_20/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_20/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_20/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_25/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_25/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_25/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_5/centered_abs_mean": 0.3095270454883575,
"signal/frontier_coverage_5/group_bin_occupancy": 0.940234375,
"signal/frontier_coverage_5/group_std_mean": 0.3819129645824432,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005540534015744924,
"signal/frontier_ece_reward/centered_abs_mean": 0.048538880050182344,
"signal/frontier_ece_reward/group_bin_occupancy": 0.7890625,
"signal/frontier_ece_reward/group_std_mean": 0.07725905627012253,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006067360006272793,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006067360006272793,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4226137399673462,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.77734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4835656762123108,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.052826717495918274,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.052826717495918274,
"step": 40
},
{
"calibration/aurc": 0.4476721577377777,
"calibration/batch_distribution_entropy": 0.9775319646696126,
"calibration/batch_entropy_100bins": 0.965599725401858,
"calibration/batch_entropy_10bins": 0.9775319646696126,
"calibration/batch_entropy_50bins": 0.9741778975790041,
"calibration/batch_uniqueness": 0.9526775737262507,
"calibration/buffer_distribution_entropy": 0.9258372964444268,
"calibration/buffer_entropy_100bins": 0.8048134162077932,
"calibration/buffer_entropy_10bins": 0.9258372964444268,
"calibration/buffer_entropy_50bins": 0.8588073303858943,
"calibration/confidence_entropy": 0.5410896147965197,
"calibration/coverage@0%": 0.004303763885691263,
"calibration/coverage@1%": 0.004303763885691263,
"calibration/coverage@10%": 0.004303763885691263,
"calibration/coverage@15%": 0.010163138885691262,
"calibration/coverage@20%": 0.01758654275066191,
"calibration/coverage@25%": 0.12735369961340698,
"calibration/coverage@30%": 0.20470204520164229,
"calibration/coverage@5%": 0.004303763885691263,
"calibration/ece": 0.21352444508406468,
"calibration/mean_confidence": 0.48576702979342884,
"calibration/prompt_uniqueness": 0.8955215311199272,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00244140625,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 825.0,
"completions/mean_length": 122.3205078125,
"completions/mean_terminated_length": 118.86110992431641,
"completions/min_length": 46.8,
"completions/min_terminated_length": 46.8,
"epoch": 0.144,
"grad_norm": 0.001746510504744947,
"learning_rate": 1e-06,
"loss": 0.0087,
"num_tokens": 150106561.0,
"reward": 0.8010232448577881,
"reward_std": 0.1486440747976303,
"rewards/accuracy_reward": 0.4798828125,
"rewards/brier_reward": 0.7014939427375794,
"rewards/format_reward": 0.9962890625,
"rewards/frontier_aurc_reward": -0.004155356530100107,
"rewards/frontier_coverage_1": 0.03323503416031599,
"rewards/frontier_coverage_10": 0.03323503416031599,
"rewards/frontier_coverage_15": 0.03323503416031599,
"rewards/frontier_coverage_20": 0.03323503416031599,
"rewards/frontier_coverage_25": 0.03323503416031599,
"rewards/frontier_coverage_5": 0.03323503416031599,
"rewards/frontier_ece_reward": 0.0009955904446542264,
"rewards/frontier_entropy_batch_reward": -0.22695176005363465,
"signal/accuracy_reward/centered_abs_mean": 0.17799072265625,
"signal/accuracy_reward/group_bin_occupancy": 0.204296875,
"signal/accuracy_reward/group_std_mean": 0.22953784465789795,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.088995361328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.088995361328125,
"signal/advantage_abs_mean": 0.11683495044708252,
"signal/advantage_pre_scale_abs_mean": 0.11683495044708252,
"signal/advantage_pre_scale_std": 0.1583361119031906,
"signal/advantage_std": 0.1583361119031906,
"signal/brier_reward/centered_abs_mean": 0.2091756820678711,
"signal/brier_reward/group_bin_occupancy": 0.9421875,
"signal/brier_reward/group_std_mean": 0.2564647078514099,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026146960258483887,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.026146960258483887,
"signal/format_reward/centered_abs_mean": 0.00714111328125,
"signal/format_reward/group_bin_occupancy": 0.13828125,
"signal/format_reward/group_std_mean": 0.01964699849486351,
"signal/format_reward/group_zero_std_frac": 0.89375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003570556640625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.003570556640625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002061827527359128,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.815234375,
"signal/frontier_aurc_reward/group_std_mean": 0.0029503189492970706,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.6906712193740535e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.6906712193740535e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_1/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_1/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_10/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_10/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_10/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_15/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_15/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_15/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_20/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_20/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_20/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_25/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_25/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_25/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_5/centered_abs_mean": 0.2580311119556427,
"signal/frontier_coverage_5/group_bin_occupancy": 0.945703125,
"signal/frontier_coverage_5/group_std_mean": 0.32445969581604006,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004618756845593453,
"signal/frontier_ece_reward/centered_abs_mean": 0.06224460154771805,
"signal/frontier_ece_reward/group_bin_occupancy": 0.85703125,
"signal/frontier_ece_reward/group_std_mean": 0.0892532080411911,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007780575193464756,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007780575193464756,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31719207763671875,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.39413705468177795,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.039649009704589844,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039649009704589844,
"step": 45
},
{
"calibration/aurc": 0.5193675277297729,
"calibration/batch_distribution_entropy": 0.983820122323354,
"calibration/batch_entropy_100bins": 0.9716850235861294,
"calibration/batch_entropy_10bins": 0.983820122323354,
"calibration/batch_entropy_50bins": 0.9798083767768972,
"calibration/batch_uniqueness": 0.9539231310533063,
"calibration/buffer_distribution_entropy": 0.9415060100334243,
"calibration/buffer_entropy_100bins": 0.8440598741793546,
"calibration/buffer_entropy_10bins": 0.9415060100334243,
"calibration/buffer_entropy_50bins": 0.888877610865622,
"calibration/confidence_entropy": 0.5284500641790459,
"calibration/coverage@0%": 0.002352179777096941,
"calibration/coverage@1%": 0.002352179777096941,
"calibration/coverage@10%": 0.002352179777096941,
"calibration/coverage@15%": 0.002352179777096941,
"calibration/coverage@20%": 0.003134958642067587,
"calibration/coverage@25%": 0.004700516372008879,
"calibration/coverage@30%": 0.005483295236979524,
"calibration/coverage@5%": 0.002352179777096941,
"calibration/ece": 0.21469450707488327,
"calibration/mean_confidence": 0.5467065595076134,
"calibration/prompt_uniqueness": 0.8933109723215835,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00185546875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 966.0,
"completions/mean_length": 125.73544921875,
"completions/mean_terminated_length": 123.11231536865235,
"completions/min_length": 50.6,
"completions/min_terminated_length": 50.6,
"epoch": 0.16,
"grad_norm": 0.0022161127999424934,
"learning_rate": 1e-06,
"loss": 0.0047,
"num_tokens": 166415020.0,
"reward": 0.7875006437301636,
"reward_std": 0.15233681201934815,
"rewards/accuracy_reward": 0.44111328125,
"rewards/brier_reward": 0.6943708896636963,
"rewards/format_reward": 0.9970703125,
"rewards/frontier_aurc_reward": -0.004590437188744545,
"rewards/frontier_coverage_1": 0.05425913706421852,
"rewards/frontier_coverage_10": 0.05425913706421852,
"rewards/frontier_coverage_15": 0.05425913706421852,
"rewards/frontier_coverage_20": 0.05425913706421852,
"rewards/frontier_coverage_25": 0.05425913706421852,
"rewards/frontier_coverage_5": 0.05425913706421852,
"rewards/frontier_ece_reward": -0.0013219955493696035,
"rewards/frontier_entropy_batch_reward": -0.19174024760723113,
"signal/accuracy_reward/centered_abs_mean": 0.169244384765625,
"signal/accuracy_reward/group_bin_occupancy": 0.196484375,
"signal/accuracy_reward/group_std_mean": 0.2129174590110779,
"signal/accuracy_reward/group_zero_std_frac": 0.428125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0846221923828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0846221923828125,
"signal/advantage_abs_mean": 0.122350013256073,
"signal/advantage_pre_scale_abs_mean": 0.122350013256073,
"signal/advantage_pre_scale_std": 0.16442006826400757,
"signal/advantage_std": 0.16442006826400757,
"signal/brier_reward/centered_abs_mean": 0.21847104728221894,
"signal/brier_reward/group_bin_occupancy": 0.945703125,
"signal/brier_reward/group_std_mean": 0.26607994437217714,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027308880910277368,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.027308880910277368,
"signal/format_reward/centered_abs_mean": 0.00565185546875,
"signal/format_reward/group_bin_occupancy": 0.1359375,
"signal/format_reward/group_std_mean": 0.01590019799768925,
"signal/format_reward/group_zero_std_frac": 0.9125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002825927734375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.002825927734375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002719328412786126,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.8203125,
"signal/frontier_aurc_reward/group_std_mean": 0.0038117329590022565,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.867597672273405e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.867597672273405e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_1/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_1/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_10/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_10/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_10/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_15/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_15/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_15/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_20/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_20/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_20/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_25/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_25/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_25/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_5/centered_abs_mean": 0.22449846267700196,
"signal/frontier_coverage_5/group_bin_occupancy": 0.924609375,
"signal/frontier_coverage_5/group_std_mean": 0.29227436184883115,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004018522240221501,
"signal/frontier_ece_reward/centered_abs_mean": 0.07301094681024552,
"signal/frontier_ece_reward/group_bin_occupancy": 0.905078125,
"signal/frontier_ece_reward/group_std_mean": 0.09825690239667892,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00912636835128069,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00912636835128069,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2845084547996521,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758203125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3657692790031433,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03556355684995651,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03556355684995651,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.6180015396742866,
"eval_calibration/batch_distribution_entropy": 0.9232840224430083,
"eval_calibration/batch_entropy_100bins": 0.719649833384205,
"eval_calibration/batch_entropy_10bins": 0.9232840224430083,
"eval_calibration/batch_entropy_50bins": 0.8035427782506473,
"eval_calibration/batch_uniqueness": 0.9052734375,
"eval_calibration/buffer_distribution_entropy": 0.9484987743799115,
"eval_calibration/buffer_entropy_100bins": 0.8625570001373941,
"eval_calibration/buffer_entropy_10bins": 0.9484987743799115,
"eval_calibration/buffer_entropy_50bins": 0.9029135065796394,
"eval_calibration/confidence_entropy": 0.5333216449567841,
"eval_calibration/coverage@0%": 0.0,
"eval_calibration/coverage@1%": 0.0,
"eval_calibration/coverage@10%": 0.0,
"eval_calibration/coverage@15%": 0.0,
"eval_calibration/coverage@20%": 0.046875,
"eval_calibration/coverage@25%": 0.046875,
"eval_calibration/coverage@30%": 0.140625,
"eval_calibration/coverage@5%": 0.0,
"eval_calibration/ece": 0.330758367790426,
"eval_calibration/mean_confidence": 0.5541032170362717,
"eval_calibration/prompt_uniqueness": 0.9052734375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 409.75,
"eval_completions/max_terminated_length": 409.75,
"eval_completions/mean_length": 132.97144317626953,
"eval_completions/mean_terminated_length": 132.97144317626953,
"eval_completions/min_length": 66.0,
"eval_completions/min_terminated_length": 66.0,
"eval_loss": 0.0,
"eval_num_tokens": 166415020.0,
"eval_reward": 0.6569966375827789,
"eval_reward_std": 0.23399890586733818,
"eval_rewards/accuracy_reward": 0.365234375,
"eval_rewards/brier_reward": 0.703216090798378,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004859182401560247,
"eval_rewards/frontier_coverage_1": 0.10959587432444096,
"eval_rewards/frontier_coverage_10": 0.10959587432444096,
"eval_rewards/frontier_coverage_15": 0.10959587432444096,
"eval_rewards/frontier_coverage_20": 0.10959587432444096,
"eval_rewards/frontier_coverage_25": 0.10959587432444096,
"eval_rewards/frontier_coverage_5": 0.10959587432444096,
"eval_rewards/frontier_ece_reward": -0.001649250101763755,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 20.2387,
"eval_samples_per_second": 24.705,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4493408203125,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.48055653274059296,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.22467041015625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22467041015625,
"eval_signal/advantage_abs_mean": 0.20993919670581818,
"eval_signal/advantage_pre_scale_abs_mean": 0.20993919670581818,
"eval_signal/advantage_pre_scale_std": 0.23163216933608055,
"eval_signal/advantage_std": 0.23163216933608055,
"eval_signal/brier_reward/centered_abs_mean": 0.21175387874245644,
"eval_signal/brier_reward/group_bin_occupancy": 0.96875,
"eval_signal/brier_reward/group_std_mean": 0.2560478299856186,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.026469234842807055,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.026469234842807055,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.003475597535725683,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.921875,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.004670257214456797,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.22131901764078e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.22131901764078e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_15/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_20/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_25/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.28018152713775635,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.36891133338212967,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005015249014832079,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.0727236233651638,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.10696529969573021,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.009090452920645475,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.009090452920645475,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.198,
"step": 50
},
{
"calibration/aurc": 0.49154793221632376,
"calibration/batch_distribution_entropy": 0.9923982265099717,
"calibration/batch_entropy_100bins": 0.976761110553309,
"calibration/batch_entropy_10bins": 0.9923982265099717,
"calibration/batch_entropy_50bins": 0.9853177330835299,
"calibration/batch_uniqueness": 0.9559410835975026,
"calibration/buffer_distribution_entropy": 0.9529318045546697,
"calibration/buffer_entropy_100bins": 0.8728419282733055,
"calibration/buffer_entropy_10bins": 0.9529318045546697,
"calibration/buffer_entropy_50bins": 0.9107275009636199,
"calibration/confidence_entropy": 0.5137282393272424,
"calibration/coverage@0%": 0.003908543297455968,
"calibration/coverage@1%": 0.003908543297455968,
"calibration/coverage@10%": 0.003908543297455968,
"calibration/coverage@15%": 0.0050804182974559685,
"calibration/coverage@20%": 0.009769447162426614,
"calibration/coverage@25%": 0.010550697162426615,
"calibration/coverage@30%": 0.021499663649706457,
"calibration/coverage@5%": 0.003908543297455968,
"calibration/ece": 0.21824396695551201,
"calibration/mean_confidence": 0.4844792598613541,
"calibration/prompt_uniqueness": 0.8977543635373308,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.001171875,
"completions/max_length": 1536.0,
"completions/max_terminated_length": 887.8,
"completions/mean_length": 135.89765625,
"completions/mean_terminated_length": 134.25655059814454,
"completions/min_length": 58.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.176,
"grad_norm": 0.001891042571514845,
"learning_rate": 1e-06,
"loss": 0.003,
"num_tokens": 183043732.0,
"reward": 0.7944678544998169,
"reward_std": 0.1392228126525879,
"rewards/accuracy_reward": 0.44111328125,
"rewards/brier_reward": 0.6948043942451477,
"rewards/format_reward": 0.9982421875,
"rewards/frontier_aurc_reward": -0.004382272064685821,
"rewards/frontier_coverage_1": 0.06402078047394752,
"rewards/frontier_coverage_10": 0.06402078047394752,
"rewards/frontier_coverage_15": 0.06402078047394752,
"rewards/frontier_coverage_20": 0.06402078047394752,
"rewards/frontier_coverage_25": 0.06402078047394752,
"rewards/frontier_coverage_5": 0.06402078047394752,
"rewards/frontier_ece_reward": 0.0002359504927881062,
"rewards/frontier_entropy_batch_reward": -0.15109863579273225,
"signal/accuracy_reward/centered_abs_mean": 0.155535888671875,
"signal/accuracy_reward/group_bin_occupancy": 0.19375,
"signal/accuracy_reward/group_std_mean": 0.20017340481281282,
"signal/accuracy_reward/group_zero_std_frac": 0.45,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0777679443359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0777679443359375,
"signal/advantage_abs_mean": 0.11083731651306153,
"signal/advantage_pre_scale_abs_mean": 0.11083731651306153,
"signal/advantage_pre_scale_std": 0.14989208579063415,
"signal/advantage_std": 0.14989208579063415,
"signal/brier_reward/centered_abs_mean": 0.22810422778129577,
"signal/brier_reward/group_bin_occupancy": 0.94296875,
"signal/brier_reward/group_std_mean": 0.27661994099617004,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02851302847266197,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02851302847266197,
"signal/format_reward/centered_abs_mean": 0.0033935546875,
"signal/format_reward/group_bin_occupancy": 0.131640625,
"signal/format_reward/group_std_mean": 0.009607380395755172,
"signal/format_reward/group_zero_std_frac": 0.946875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00169677734375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00169677734375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026019237469881774,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.793359375,
"signal/frontier_aurc_reward/group_std_mean": 0.0037212247960269453,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.657443350879476e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.657443350879476e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_1/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_1/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_10/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_10/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_10/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_15/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_15/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_15/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_20/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_20/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_20/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_25/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_25/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_25/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_5/centered_abs_mean": 0.26558775305747984,
"signal/frontier_coverage_5/group_bin_occupancy": 0.934375,
"signal/frontier_coverage_5/group_std_mean": 0.3330970585346222,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004754020553082228,
"signal/frontier_ece_reward/centered_abs_mean": 0.06770721971988677,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
"signal/frontier_ece_reward/group_std_mean": 0.09261107891798019,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.008463402464985847,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.008463402464985847,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2386895924806595,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.756640625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32064216732978823,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.029836199060082436,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029836199060082436,
"step": 55
},
{
"calibration/aurc": 0.4491234760798785,
"calibration/batch_distribution_entropy": 0.98299611256218,
"calibration/batch_entropy_100bins": 0.9689459410888717,
"calibration/batch_entropy_10bins": 0.98299611256218,
"calibration/batch_entropy_50bins": 0.9776301185799596,
"calibration/batch_uniqueness": 0.9538074206065245,
"calibration/buffer_distribution_entropy": 0.9623449924153729,
"calibration/buffer_entropy_100bins": 0.8922897436238332,
"calibration/buffer_entropy_10bins": 0.9623449924153729,
"calibration/buffer_entropy_50bins": 0.9257487085635707,
"calibration/confidence_entropy": 0.49867141502553186,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.005859375,
"calibration/coverage@15%": 0.009765625,
"calibration/coverage@20%": 0.0125,
"calibration/coverage@25%": 0.021875,
"calibration/coverage@30%": 0.0421875,
"calibration/coverage@5%": 0.005078125,
"calibration/ece": 0.18586777873207352,
"calibration/mean_confidence": 0.4387145557850641,
"calibration/prompt_uniqueness": 0.891550752796566,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 994.2,
"completions/max_terminated_length": 709.2,
"completions/mean_length": 144.47421875,
"completions/mean_terminated_length": 143.9313934326172,
"completions/min_length": 58.0,
"completions/min_terminated_length": 58.0,
"epoch": 0.192,
"grad_norm": 0.0014471631729975343,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 199337964.0,
"reward": 0.8112979173660279,
"reward_std": 0.1323389947414398,
"rewards/accuracy_reward": 0.47412109375,
"rewards/brier_reward": 0.7108014822006226,
"rewards/format_reward": 0.99892578125,
"rewards/frontier_aurc_reward": -0.003819770412519574,
"rewards/frontier_coverage_1": 0.06823012800887227,
"rewards/frontier_coverage_10": 0.06823012800887227,
"rewards/frontier_coverage_15": 0.06823012800887227,
"rewards/frontier_coverage_20": 0.06823012800887227,
"rewards/frontier_coverage_25": 0.06823012800887227,
"rewards/frontier_coverage_5": 0.06823012800887227,
"rewards/frontier_ece_reward": 0.008699505100958049,
"rewards/frontier_entropy_batch_reward": -0.17938159108161927,
"signal/accuracy_reward/centered_abs_mean": 0.148931884765625,
"signal/accuracy_reward/group_bin_occupancy": 0.193359375,
"signal/accuracy_reward/group_std_mean": 0.19490036964416504,
"signal/accuracy_reward/group_zero_std_frac": 0.453125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0744659423828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0744659423828125,
"signal/advantage_abs_mean": 0.10477420687675476,
"signal/advantage_pre_scale_abs_mean": 0.10477420687675476,
"signal/advantage_pre_scale_std": 0.14257683753967285,
"signal/advantage_std": 0.14257683753967285,
"signal/brier_reward/centered_abs_mean": 0.22218222618103028,
"signal/brier_reward/group_bin_occupancy": 0.919140625,
"signal/brier_reward/group_std_mean": 0.27116515636444094,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.027772778272628786,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.027772778272628786,
"signal/format_reward/centered_abs_mean": 0.002081298828125,
"signal/format_reward/group_bin_occupancy": 0.129296875,
"signal/format_reward/group_std_mean": 0.006076698703691363,
"signal/format_reward/group_zero_std_frac": 0.965625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002302809851244092,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375,
"signal/frontier_aurc_reward/group_std_mean": 0.003347306279465556,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.122029495192692e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.122029495192692e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_1/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_1/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_10/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_10/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_10/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_15/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_15/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_15/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_20/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_20/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_25/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_25/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_5/centered_abs_mean": 0.2844557523727417,
"signal/frontier_coverage_5/group_bin_occupancy": 0.9390625,
"signal/frontier_coverage_5/group_std_mean": 0.3530541956424713,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005091757886111737,
"signal/frontier_ece_reward/centered_abs_mean": 0.061374531686306,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8796875,
"signal/frontier_ece_reward/group_std_mean": 0.08481642305850982,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.00767181646078825,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.00767181646078825,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27003463804721833,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75390625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3499366283416748,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03375432975590229,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03375432975590229,
"step": 60
},
{
"calibration/aurc": 0.38886185437113174,
"calibration/batch_distribution_entropy": 0.993174546110124,
"calibration/batch_entropy_100bins": 0.9755285743495123,
"calibration/batch_entropy_10bins": 0.993174546110124,
"calibration/batch_entropy_50bins": 0.9853249378435794,
"calibration/batch_uniqueness": 0.9561890088248373,
"calibration/buffer_distribution_entropy": 0.9696478610556747,
"calibration/buffer_entropy_100bins": 0.9073566670205515,
"calibration/buffer_entropy_10bins": 0.9696478610556747,
"calibration/buffer_entropy_50bins": 0.9373040055132771,
"calibration/confidence_entropy": 0.4993493040570547,
"calibration/coverage@0%": 0.001954656862745098,
"calibration/coverage@1%": 0.001954656862745098,
"calibration/coverage@10%": 0.01878440245961398,
"calibration/coverage@15%": 0.08023254335980967,
"calibration/coverage@20%": 0.12720311240934729,
"calibration/coverage@25%": 0.3667257827788649,
"calibration/coverage@30%": 0.413671875,
"calibration/coverage@5%": 0.001954656862745098,
"calibration/ece": 0.23965065800221003,
"calibration/mean_confidence": 0.4801674712232713,
"calibration/prompt_uniqueness": 0.8904477757544225,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 1357.2,
"completions/max_terminated_length": 678.2,
"completions/mean_length": 154.43896484375,
"completions/mean_terminated_length": 153.49268188476563,
"completions/min_length": 62.0,
"completions/min_terminated_length": 62.0,
"epoch": 0.208,
"grad_norm": 0.0015919266734272242,
"learning_rate": 1e-06,
"loss": 0.0019,
"num_tokens": 215951643.0,
"reward": 0.8326294660568238,
"reward_std": 0.12973762750625611,
"rewards/accuracy_reward": 0.519140625,
"rewards/brier_reward": 0.7076422810554505,
"rewards/format_reward": 0.9990234375,
"rewards/frontier_aurc_reward": -0.003565392177551985,
"rewards/frontier_coverage_1": 0.030470560118556023,
"rewards/frontier_coverage_10": 0.030470560118556023,
"rewards/frontier_coverage_15": 0.030470560118556023,
"rewards/frontier_coverage_20": 0.030470560118556023,
"rewards/frontier_coverage_25": 0.030470560118556023,
"rewards/frontier_coverage_5": 0.030470560118556023,
"rewards/frontier_ece_reward": 0.01095831673592329,
"rewards/frontier_entropy_batch_reward": -0.15589092671871185,
"signal/accuracy_reward/centered_abs_mean": 0.142822265625,
"signal/accuracy_reward/group_bin_occupancy": 0.19453125,
"signal/accuracy_reward/group_std_mean": 0.19114138782024384,
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0714111328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0714111328125,
"signal/advantage_abs_mean": 0.10268731862306595,
"signal/advantage_pre_scale_abs_mean": 0.10268731862306595,
"signal/advantage_pre_scale_std": 0.1399999141693115,
"signal/advantage_std": 0.1399999141693115,
"signal/brier_reward/centered_abs_mean": 0.22231624722480775,
"signal/brier_reward/group_bin_occupancy": 0.922265625,
"signal/brier_reward/group_std_mean": 0.2717791020870209,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02778953090310097,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02778953090310097,
"signal/format_reward/centered_abs_mean": 0.00189208984375,
"signal/format_reward/group_bin_occupancy": 0.12890625,
"signal/format_reward/group_std_mean": 0.005524271540343762,
"signal/format_reward/group_zero_std_frac": 0.96875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000946044921875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002406009705737233,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7859375,
"signal/frontier_aurc_reward/group_std_mean": 0.0035011733416467905,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.3067572551080954e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.3067572551080954e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_1/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_1/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_10/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_10/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_10/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_15/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_15/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_15/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_20/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_20/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_20/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_25/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_25/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_25/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_5/centered_abs_mean": 0.2699986696243286,
"signal/frontier_coverage_5/group_bin_occupancy": 0.919921875,
"signal/frontier_coverage_5/group_std_mean": 0.3401759326457977,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004832976032048464,
"signal/frontier_ece_reward/centered_abs_mean": 0.06127958670258522,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8984375,
"signal/frontier_ece_reward/group_std_mean": 0.08330589979887008,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007659948337823153,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007659948337823153,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24359245598316193,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.75625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3275866687297821,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03044905699789524,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03044905699789524,
"step": 65
},
{
"calibration/aurc": 0.37149284629454293,
"calibration/batch_distribution_entropy": 0.9931569295884003,
"calibration/batch_entropy_100bins": 0.9783278813046771,
"calibration/batch_entropy_10bins": 0.9931569295884003,
"calibration/batch_entropy_50bins": 0.9868018130911993,
"calibration/batch_uniqueness": 0.9561319831691417,
"calibration/buffer_distribution_entropy": 0.9742613412980017,
"calibration/buffer_entropy_100bins": 0.9193829808162708,
"calibration/buffer_entropy_10bins": 0.9742613412980017,
"calibration/buffer_entropy_50bins": 0.946153440671097,
"calibration/confidence_entropy": 0.5044726683622951,
"calibration/coverage@0%": 0.009770982022946165,
"calibration/coverage@1%": 0.009770982022946165,
"calibration/coverage@10%": 0.026184111915314067,
"calibration/coverage@15%": 0.043773703402593914,
"calibration/coverage@20%": 0.12471181494858216,
"calibration/coverage@25%": 0.21463975444629907,
"calibration/coverage@30%": 0.34204774975058516,
"calibration/coverage@5%": 0.011333482022946165,
"calibration/ece": 0.17022807751994243,
"calibration/mean_confidence": 0.5015854712077894,
"calibration/prompt_uniqueness": 0.8873234306350446,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1016.2,
"completions/max_terminated_length": 697.0,
"completions/mean_length": 160.7646484375,
"completions/mean_terminated_length": 160.22604675292968,
"completions/min_length": 67.8,
"completions/min_terminated_length": 67.8,
"epoch": 0.224,
"grad_norm": 0.001383577473461628,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 232751057.0,
"reward": 0.8240685939788819,
"reward_std": 0.12576959878206254,
"rewards/accuracy_reward": 0.480078125,
"rewards/brier_reward": 0.7404986023902893,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0036113801877945663,
"rewards/frontier_coverage_1": 0.08505139946937561,
"rewards/frontier_coverage_10": 0.08505139946937561,
"rewards/frontier_coverage_15": 0.08505139946937561,
"rewards/frontier_coverage_20": 0.08505139946937561,
"rewards/frontier_coverage_25": 0.08505139946937561,
"rewards/frontier_coverage_5": 0.08505139946937561,
"rewards/frontier_ece_reward": 0.015613408572971822,
"rewards/frontier_entropy_batch_reward": -0.15448164641857148,
"signal/accuracy_reward/centered_abs_mean": 0.13514404296875,
"signal/accuracy_reward/group_bin_occupancy": 0.187890625,
"signal/accuracy_reward/group_std_mean": 0.177346870303154,
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.067572021484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.067572021484375,
"signal/advantage_abs_mean": 0.09964470565319061,
"signal/advantage_pre_scale_abs_mean": 0.09964470565319061,
"signal/advantage_pre_scale_std": 0.13848601579666137,
"signal/advantage_std": 0.13848601579666137,
"signal/brier_reward/centered_abs_mean": 0.19449081718921662,
"signal/brier_reward/group_bin_occupancy": 0.896875,
"signal/brier_reward/group_std_mean": 0.24308900237083436,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.024311352148652078,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.024311352148652078,
"signal/format_reward/centered_abs_mean": 0.000933837890625,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0024258273653686045,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004669189453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004669189453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002517683617770672,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7875,
"signal/frontier_aurc_reward/group_std_mean": 0.0036455394700169565,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.506653422140516e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.506653422140516e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_1/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_1/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_10/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_10/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_10/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_15/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_15/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_15/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_20/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_20/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_20/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_25/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_25/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_25/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_5/centered_abs_mean": 0.23284452259540558,
"signal/frontier_coverage_5/group_bin_occupancy": 0.909765625,
"signal/frontier_coverage_5/group_std_mean": 0.29805226922035216,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041679169051349165,
"signal/frontier_ece_reward/centered_abs_mean": 0.0579533688724041,
"signal/frontier_ece_reward/group_bin_occupancy": 0.900390625,
"signal/frontier_ece_reward/group_std_mean": 0.07829077690839767,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.007244171109050512,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.007244171109050512,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24184339344501496,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.758984375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3229557752609253,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03023042418062687,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03023042418062687,
"step": 70
},
{
"calibration/aurc": 0.3843023569473395,
"calibration/batch_distribution_entropy": 0.9825564335986041,
"calibration/batch_entropy_100bins": 0.9674868151684599,
"calibration/batch_entropy_10bins": 0.9825564335986041,
"calibration/batch_entropy_50bins": 0.9772102320227738,
"calibration/batch_uniqueness": 0.9530455243124984,
"calibration/buffer_distribution_entropy": 0.9776003008409667,
"calibration/buffer_entropy_100bins": 0.9292828890688549,
"calibration/buffer_entropy_10bins": 0.9776003008409667,
"calibration/buffer_entropy_50bins": 0.9531789644661673,
"calibration/confidence_entropy": 0.5097281333912452,
"calibration/coverage@0%": 0.01525272137964775,
"calibration/coverage@1%": 0.01525272137964775,
"calibration/coverage@10%": 0.054781525195694715,
"calibration/coverage@15%": 0.10409047822896281,
"calibration/coverage@20%": 0.1984008072407045,
"calibration/coverage@25%": 0.242578125,
"calibration/coverage@30%": 0.265234375,
"calibration/coverage@5%": 0.017209668542074362,
"calibration/ece": 0.17762837478013666,
"calibration/mean_confidence": 0.5141673460767282,
"calibration/prompt_uniqueness": 0.8842985822060353,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 882.2,
"completions/max_terminated_length": 478.2,
"completions/mean_length": 172.72275390625,
"completions/mean_terminated_length": 172.45663757324218,
"completions/min_length": 72.0,
"completions/min_terminated_length": 72.0,
"epoch": 0.24,
"grad_norm": 0.0014637865824624896,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 249771418.0,
"reward": 0.8475449323654175,
"reward_std": 0.126870197057724,
"rewards/accuracy_reward": 0.54345703125,
"rewards/brier_reward": 0.7408711910247803,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0031028116587549447,
"rewards/frontier_coverage_1": 0.043854419514536856,
"rewards/frontier_coverage_10": 0.043854419514536856,
"rewards/frontier_coverage_15": 0.043854419514536856,
"rewards/frontier_coverage_20": 0.043854419514536856,
"rewards/frontier_coverage_25": 0.043854419514536856,
"rewards/frontier_coverage_5": 0.043854419514536856,
"rewards/frontier_ece_reward": 0.018962536379694937,
"rewards/frontier_entropy_batch_reward": -0.1885848104953766,
"signal/accuracy_reward/centered_abs_mean": 0.143426513671875,
"signal/accuracy_reward/group_bin_occupancy": 0.1921875,
"signal/accuracy_reward/group_std_mean": 0.18838266730308534,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0717132568359375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0717132568359375,
"signal/advantage_abs_mean": 0.10101936310529709,
"signal/advantage_pre_scale_abs_mean": 0.10101936310529709,
"signal/advantage_pre_scale_std": 0.14047700464725493,
"signal/advantage_std": 0.14047700464725493,
"signal/brier_reward/centered_abs_mean": 0.19066874384880067,
"signal/brier_reward/group_bin_occupancy": 0.9046875,
"signal/brier_reward/group_std_mean": 0.23766070902347564,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023833592981100084,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.023833592981100084,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002466377941891551,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.798046875,
"signal/frontier_aurc_reward/group_std_mean": 0.0035321788396686315,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.4148163578938696e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.4148163578938696e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_1/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_1/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_10/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_10/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_10/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_15/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_15/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_15/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_20/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_20/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_20/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_25/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_25/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_25/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_5/centered_abs_mean": 0.22646748423576354,
"signal/frontier_coverage_5/group_bin_occupancy": 0.902734375,
"signal/frontier_coverage_5/group_std_mean": 0.2924614608287811,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004053767677396536,
"signal/frontier_ece_reward/centered_abs_mean": 0.05426667183637619,
"signal/frontier_ece_reward/group_bin_occupancy": 0.887890625,
"signal/frontier_ece_reward/group_std_mean": 0.07379693686962127,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006783333979547024,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006783333979547024,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2799087405204773,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35980047583580016,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034988592565059665,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034988592565059665,
"step": 75
},
{
"calibration/aurc": 0.29483841819292744,
"calibration/batch_distribution_entropy": 0.9777187935073307,
"calibration/batch_entropy_100bins": 0.9667154925944665,
"calibration/batch_entropy_10bins": 0.9777187935073307,
"calibration/batch_entropy_50bins": 0.9757633762213546,
"calibration/batch_uniqueness": 0.9522694542284718,
"calibration/buffer_distribution_entropy": 0.9800587855542565,
"calibration/buffer_entropy_100bins": 0.9373105642982278,
"calibration/buffer_entropy_10bins": 0.9800587855542565,
"calibration/buffer_entropy_50bins": 0.958673701749106,
"calibration/confidence_entropy": 0.4694770219979284,
"calibration/coverage@0%": 0.016024798189823875,
"calibration/coverage@1%": 0.016024798189823875,
"calibration/coverage@10%": 0.12074134662426614,
"calibration/coverage@15%": 0.23877048679060664,
"calibration/coverage@20%": 0.35218704134050877,
"calibration/coverage@25%": 0.449902917074364,
"calibration/coverage@30%": 0.5347044704011742,
"calibration/coverage@5%": 0.016806048189823876,
"calibration/ece": 0.145108984662015,
"calibration/mean_confidence": 0.4939348278541888,
"calibration/prompt_uniqueness": 0.8741655522405047,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 939.4,
"completions/max_terminated_length": 736.8,
"completions/mean_length": 176.50556640625,
"completions/mean_terminated_length": 176.3722412109375,
"completions/min_length": 82.2,
"completions/min_terminated_length": 82.2,
"epoch": 0.256,
"grad_norm": 0.0011565532768145204,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 266633651.0,
"reward": 0.8428894639015198,
"reward_std": 0.11935372054576873,
"rewards/accuracy_reward": 0.51728515625,
"rewards/brier_reward": 0.7649436235427857,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_aurc_reward": -0.002899883547797799,
"rewards/frontier_coverage_1": 0.09639933593571186,
"rewards/frontier_coverage_10": 0.09639933593571186,
"rewards/frontier_coverage_15": 0.09639933593571186,
"rewards/frontier_coverage_20": 0.09639933593571186,
"rewards/frontier_coverage_25": 0.09639933593571186,
"rewards/frontier_coverage_5": 0.09639933593571186,
"rewards/frontier_ece_reward": 0.023540638387203217,
"rewards/frontier_entropy_batch_reward": -0.19457639753818512,
"signal/accuracy_reward/centered_abs_mean": 0.130194091796875,
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
"signal/accuracy_reward/group_std_mean": 0.17149793207645417,
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0650970458984375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0650970458984375,
"signal/advantage_abs_mean": 0.0930386334657669,
"signal/advantage_pre_scale_abs_mean": 0.0930386334657669,
"signal/advantage_pre_scale_std": 0.13454234898090361,
"signal/advantage_std": 0.13454234898090361,
"signal/brier_reward/centered_abs_mean": 0.17940108776092528,
"signal/brier_reward/group_bin_occupancy": 0.879296875,
"signal/brier_reward/group_std_mean": 0.2269110530614853,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02242513597011566,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02242513597011566,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_bin_occupancy": 0.12734375,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024126087315380572,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875,
"signal/frontier_aurc_reward/group_std_mean": 0.003546612523496151,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.318569772294722e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.318569772294722e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_1/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_1/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_10/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_10/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_10/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_15/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_15/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_15/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_20/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_20/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_20/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_25/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_25/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_25/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_5/centered_abs_mean": 0.22456566095352173,
"signal/frontier_coverage_5/group_bin_occupancy": 0.897265625,
"signal/frontier_coverage_5/group_std_mean": 0.28588892221450807,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004019725229591131,
"signal/frontier_ece_reward/centered_abs_mean": 0.050846466422080995,
"signal/frontier_ece_reward/group_bin_occupancy": 0.88046875,
"signal/frontier_ece_reward/group_std_mean": 0.06863305419683456,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.006355808302760124,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.006355808302760124,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27906052470207215,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.74765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3593753814697266,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03488256558775902,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03488256558775902,
"step": 80
},
{
"calibration/aurc": 0.396818313806098,
"calibration/batch_distribution_entropy": 0.9904097057091761,
"calibration/batch_entropy_100bins": 0.9737173977651677,
"calibration/batch_entropy_10bins": 0.9904097057091761,
"calibration/batch_entropy_50bins": 0.9832208433275686,
"calibration/batch_uniqueness": 0.9558441162109375,
"calibration/buffer_distribution_entropy": 0.9823054908036963,
"calibration/buffer_entropy_100bins": 0.9438449914736629,
"calibration/buffer_entropy_10bins": 0.9823054908036963,
"calibration/buffer_entropy_50bins": 0.9631353066861775,
"calibration/confidence_entropy": 0.48614395905828134,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.047265625,
"calibration/coverage@15%": 0.06640625,
"calibration/coverage@20%": 0.16875,
"calibration/coverage@25%": 0.22734375,
"calibration/coverage@30%": 0.33125,
"calibration/coverage@5%": 0.01328125,
"calibration/ece": 0.14752605867831683,
"calibration/mean_confidence": 0.49425239867748705,
"calibration/prompt_uniqueness": 0.879052734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 687.0,
"completions/max_terminated_length": 687.0,
"completions/mean_length": 186.03525390625,
"completions/mean_terminated_length": 186.03525390625,
"completions/min_length": 78.2,
"completions/min_terminated_length": 78.2,
"epoch": 0.272,
"grad_norm": 0.0011228998191654682,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 283504348.0,
"reward": 0.8343194007873536,
"reward_std": 0.1147305577993393,
"rewards/accuracy_reward": 0.49560546875,
"rewards/brier_reward": 0.7591516852378846,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0030454121064394713,
"rewards/frontier_coverage_1": 0.1056124085560441,
"rewards/frontier_coverage_10": 0.1056124085560441,
"rewards/frontier_coverage_15": 0.1056124085560441,
"rewards/frontier_coverage_20": 0.1056124085560441,
"rewards/frontier_coverage_25": 0.1056124085560441,
"rewards/frontier_coverage_5": 0.1056124085560441,
"rewards/frontier_ece_reward": 0.019984208419919013,
"rewards/frontier_entropy_batch_reward": -0.17652736306190492,
"signal/accuracy_reward/centered_abs_mean": 0.127191162109375,
"signal/accuracy_reward/group_bin_occupancy": 0.183984375,
"signal/accuracy_reward/group_std_mean": 0.16545215547084807,
"signal/accuracy_reward/group_zero_std_frac": 0.528125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0635955810546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0635955810546875,
"signal/advantage_abs_mean": 0.09112635999917984,
"signal/advantage_pre_scale_abs_mean": 0.09112635999917984,
"signal/advantage_pre_scale_std": 0.1298075333237648,
"signal/advantage_std": 0.1298075333237648,
"signal/brier_reward/centered_abs_mean": 0.17884210646152496,
"signal/brier_reward/group_bin_occupancy": 0.869140625,
"signal/brier_reward/group_std_mean": 0.22582717537879943,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02235526330769062,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.02235526330769062,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023218464106321336,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7765625,
"signal/frontier_aurc_reward/group_std_mean": 0.003383269626647234,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.1561049147276205e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.1561049147276205e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_1/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_1/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_10/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_10/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_10/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_15/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_15/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_15/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_20/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_20/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_20/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_25/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_25/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_25/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_5/centered_abs_mean": 0.22928011119365693,
"signal/frontier_coverage_5/group_bin_occupancy": 0.894921875,
"signal/frontier_coverage_5/group_std_mean": 0.2927806079387665,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004104113671928644,
"signal/frontier_ece_reward/centered_abs_mean": 0.04553831294178963,
"signal/frontier_ece_reward/group_bin_occupancy": 0.86875,
"signal/frontier_ece_reward/group_std_mean": 0.0625507190823555,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005692289117723704,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005692289117723704,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.262398362159729,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34030004143714904,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032799795269966125,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032799795269966125,
"step": 85
},
{
"calibration/aurc": 0.3438739028381422,
"calibration/batch_distribution_entropy": 0.9872127090769016,
"calibration/batch_entropy_100bins": 0.9745401927062021,
"calibration/batch_entropy_10bins": 0.9872127090769016,
"calibration/batch_entropy_50bins": 0.9833082642512846,
"calibration/batch_uniqueness": 0.955206298828125,
"calibration/buffer_distribution_entropy": 0.9842534881328724,
"calibration/buffer_entropy_100bins": 0.9495188377364201,
"calibration/buffer_entropy_10bins": 0.9842534881328724,
"calibration/buffer_entropy_50bins": 0.9670404366119122,
"calibration/confidence_entropy": 0.4957282055397302,
"calibration/coverage@0%": 0.0046875,
"calibration/coverage@1%": 0.0046875,
"calibration/coverage@10%": 0.06484375,
"calibration/coverage@15%": 0.106640625,
"calibration/coverage@20%": 0.14296875,
"calibration/coverage@25%": 0.212890625,
"calibration/coverage@30%": 0.2953125,
"calibration/coverage@5%": 0.01953125,
"calibration/ece": 0.13413243536778918,
"calibration/mean_confidence": 0.5377243429204241,
"calibration/prompt_uniqueness": 0.878662109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 754.2,
"completions/max_terminated_length": 565.8,
"completions/mean_length": 182.9107421875,
"completions/mean_terminated_length": 182.64695739746094,
"completions/min_length": 80.4,
"completions/min_terminated_length": 80.4,
"epoch": 0.288,
"grad_norm": 0.0012018464040011168,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 300335530.0,
"reward": 0.8439226388931275,
"reward_std": 0.11675801277160644,
"rewards/accuracy_reward": 0.51513671875,
"rewards/brier_reward": 0.7593789458274841,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.0030923429410904648,
"rewards/frontier_coverage_1": 0.10312287509441376,
"rewards/frontier_coverage_10": 0.10312287509441376,
"rewards/frontier_coverage_15": 0.10312287509441376,
"rewards/frontier_coverage_20": 0.10312287509441376,
"rewards/frontier_coverage_25": 0.10312287509441376,
"rewards/frontier_coverage_5": 0.10312287509441376,
"rewards/frontier_ece_reward": 0.01944323191419244,
"rewards/frontier_entropy_batch_reward": -0.1730232924222946,
"signal/accuracy_reward/centered_abs_mean": 0.134979248046875,
"signal/accuracy_reward/group_bin_occupancy": 0.1890625,
"signal/accuracy_reward/group_std_mean": 0.17875251770019532,
"signal/accuracy_reward/group_zero_std_frac": 0.4875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0674896240234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0674896240234375,
"signal/advantage_abs_mean": 0.0906929224729538,
"signal/advantage_pre_scale_abs_mean": 0.0906929224729538,
"signal/advantage_pre_scale_std": 0.13175700902938842,
"signal/advantage_std": 0.13175700902938842,
"signal/brier_reward/centered_abs_mean": 0.17308151721954346,
"signal/brier_reward/group_bin_occupancy": 0.875,
"signal/brier_reward/group_std_mean": 0.2171693116426468,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021635189652442932,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.021635189652442932,
"signal/format_reward/centered_abs_mean": 0.001513671875,
"signal/format_reward/group_bin_occupancy": 0.128125,
"signal/format_reward/group_std_mean": 0.004419417353346944,
"signal/format_reward/group_zero_std_frac": 0.975,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0007568359375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002408099686726928,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.76796875,
"signal/frontier_aurc_reward/group_std_mean": 0.0035015761386603117,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.310498406994157e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.310498406994157e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_1/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_1/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_10/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_10/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_10/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_15/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_15/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_15/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_20/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_20/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_20/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_25/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_25/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_25/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_5/centered_abs_mean": 0.21902143955230713,
"signal/frontier_coverage_5/group_bin_occupancy": 0.885546875,
"signal/frontier_coverage_5/group_std_mean": 0.27995782494544985,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003920483542606235,
"signal/frontier_ece_reward/centered_abs_mean": 0.043396206200122835,
"signal/frontier_ece_reward/group_bin_occupancy": 0.865625,
"signal/frontier_ece_reward/group_std_mean": 0.05951143801212311,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005424525775015354,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005424525775015354,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2536126673221588,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.330656635761261,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03170158341526985,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03170158341526985,
"step": 90
},
{
"calibration/aurc": 0.30520045908732824,
"calibration/batch_distribution_entropy": 0.9809410439481454,
"calibration/batch_entropy_100bins": 0.9710889487139797,
"calibration/batch_entropy_10bins": 0.9809410439481454,
"calibration/batch_entropy_50bins": 0.979682041413058,
"calibration/batch_uniqueness": 0.9534674290791777,
"calibration/buffer_distribution_entropy": 0.9852726079888707,
"calibration/buffer_entropy_100bins": 0.9542494347166681,
"calibration/buffer_entropy_10bins": 0.9852726079888707,
"calibration/buffer_entropy_50bins": 0.970168674026028,
"calibration/confidence_entropy": 0.487046948757095,
"calibration/coverage@0%": 0.00703660102739726,
"calibration/coverage@1%": 0.00703660102739726,
"calibration/coverage@10%": 0.06920177959882583,
"calibration/coverage@15%": 0.11181124633072406,
"calibration/coverage@20%": 0.20913420376712327,
"calibration/coverage@25%": 0.3654484160958904,
"calibration/coverage@30%": 0.5100178877201565,
"calibration/coverage@5%": 0.00703660102739726,
"calibration/ece": 0.11105027567237662,
"calibration/mean_confidence": 0.5574284681401743,
"calibration/prompt_uniqueness": 0.8712910948881373,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 663.8,
"completions/max_terminated_length": 663.8,
"completions/mean_length": 184.992578125,
"completions/mean_terminated_length": 184.992578125,
"completions/min_length": 77.6,
"completions/min_terminated_length": 77.6,
"epoch": 0.304,
"grad_norm": 0.0013152319006621838,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 317159806.0,
"reward": 0.8400702953338623,
"reward_std": 0.11409472972154618,
"rewards/accuracy_reward": 0.50537109375,
"rewards/brier_reward": 0.7638264536857605,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0030908068176358936,
"rewards/frontier_coverage_1": 0.1076532706618309,
"rewards/frontier_coverage_10": 0.1076532706618309,
"rewards/frontier_coverage_15": 0.1076532706618309,
"rewards/frontier_coverage_20": 0.1076532706618309,
"rewards/frontier_coverage_25": 0.1076532706618309,
"rewards/frontier_coverage_5": 0.1076532706618309,
"rewards/frontier_ece_reward": 0.019258670136332513,
"rewards/frontier_entropy_batch_reward": -0.1748884290456772,
"signal/accuracy_reward/centered_abs_mean": 0.126800537109375,
"signal/accuracy_reward/group_bin_occupancy": 0.186328125,
"signal/accuracy_reward/group_std_mean": 0.1693242758512497,
"signal/accuracy_reward/group_zero_std_frac": 0.509375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0634002685546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0634002685546875,
"signal/advantage_abs_mean": 0.08780478239059449,
"signal/advantage_pre_scale_abs_mean": 0.08780478239059449,
"signal/advantage_pre_scale_std": 0.1284501165151596,
"signal/advantage_std": 0.1284501165151596,
"signal/brier_reward/centered_abs_mean": 0.1634564906358719,
"signal/brier_reward/group_bin_occupancy": 0.869140625,
"signal/brier_reward/group_std_mean": 0.207411727309227,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020432061329483987,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.020432061329483987,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002385811135172844,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.784375,
"signal/frontier_aurc_reward/group_std_mean": 0.0034592232666909696,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2706017120508476e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2706017120508476e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_1/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_1/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_10/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_10/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_10/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_15/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_15/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_15/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_20/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_20/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_20/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_25/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_25/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_25/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_5/centered_abs_mean": 0.20289005041122438,
"signal/frontier_coverage_5/group_bin_occupancy": 0.879296875,
"signal/frontier_coverage_5/group_std_mean": 0.26228512823581696,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003631731867790222,
"signal/frontier_ece_reward/centered_abs_mean": 0.041925042122602466,
"signal/frontier_ece_reward/group_bin_occupancy": 0.84453125,
"signal/frontier_ece_reward/group_std_mean": 0.05757189467549324,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.005240630265325308,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.005240630265325308,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25924491286277773,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.754296875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3380768716335297,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032405614107847217,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032405614107847217,
"step": 95
},
{
"calibration/aurc": 0.2647507481106945,
"calibration/batch_distribution_entropy": 0.9905705255561216,
"calibration/batch_entropy_100bins": 0.9749278642951309,
"calibration/batch_entropy_10bins": 0.9905705255561216,
"calibration/batch_entropy_50bins": 0.9843588547973605,
"calibration/batch_uniqueness": 0.9554248612111863,
"calibration/buffer_distribution_entropy": 0.9859378110837035,
"calibration/buffer_entropy_100bins": 0.9583101311689097,
"calibration/buffer_entropy_10bins": 0.9859378110837035,
"calibration/buffer_entropy_50bins": 0.9726730927304548,
"calibration/confidence_entropy": 0.48981557023424893,
"calibration/coverage@0%": 0.0453491927592955,
"calibration/coverage@1%": 0.0453491927592955,
"calibration/coverage@10%": 0.24703170865949117,
"calibration/coverage@15%": 0.34710127201565555,
"calibration/coverage@20%": 0.41707207069471625,
"calibration/coverage@25%": 0.49878913894324856,
"calibration/coverage@30%": 0.572265625,
"calibration/coverage@5%": 0.09306124633072407,
"calibration/ece": 0.1539305622574378,
"calibration/mean_confidence": 0.5290979156854486,
"calibration/prompt_uniqueness": 0.8721267620805151,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 890.6,
"completions/max_terminated_length": 481.2,
"completions/mean_length": 185.70908203125,
"completions/mean_terminated_length": 185.3141632080078,
"completions/min_length": 92.6,
"completions/min_terminated_length": 92.6,
"epoch": 0.32,
"grad_norm": 0.0009060048614628613,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 334150171.0,
"reward": 0.8524771690368652,
"reward_std": 0.09913994669914246,
"rewards/accuracy_reward": 0.5279296875,
"rewards/brier_reward": 0.7727201581001282,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.00271282319445163,
"rewards/frontier_coverage_1": 0.09441364500671626,
"rewards/frontier_coverage_10": 0.09441364500671626,
"rewards/frontier_coverage_15": 0.09441364500671626,
"rewards/frontier_coverage_20": 0.09441364500671626,
"rewards/frontier_coverage_25": 0.09441364500671626,
"rewards/frontier_coverage_5": 0.09441364500671626,
"rewards/frontier_ece_reward": 0.02004805374890566,
"rewards/frontier_entropy_batch_reward": -0.16383886635303496,
"signal/accuracy_reward/centered_abs_mean": 0.0960205078125,
"signal/accuracy_reward/group_bin_occupancy": 0.178125,
"signal/accuracy_reward/group_std_mean": 0.13594979792833328,
"signal/accuracy_reward/group_zero_std_frac": 0.575,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04801025390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04801025390625,
"signal/advantage_abs_mean": 0.07516542375087738,
"signal/advantage_pre_scale_abs_mean": 0.07516542375087738,
"signal/advantage_pre_scale_std": 0.11188042908906937,
"signal/advantage_std": 0.11188042908906937,
"signal/brier_reward/centered_abs_mean": 0.15651972889900206,
"signal/brier_reward/group_bin_occupancy": 0.848828125,
"signal/brier_reward/group_std_mean": 0.20049535632133483,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019564966112375258,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019564966112375258,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002122000069357455,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.769921875,
"signal/frontier_aurc_reward/group_std_mean": 0.003134680772200227,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.79838005756028e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.79838005756028e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_1/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_10/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_15/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_20/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_20/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_25/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_25/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_25/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_5/centered_abs_mean": 0.19597499668598176,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_5/group_std_mean": 0.25266251862049105,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035079522524029015,
"signal/frontier_ece_reward/centered_abs_mean": 0.038548742234706876,
"signal/frontier_ece_reward/group_bin_occupancy": 0.83828125,
"signal/frontier_ece_reward/group_std_mean": 0.05287352129817009,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0048185927793383595,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0048185927793383595,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24952644407749175,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740234375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32922094464302065,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03119080550968647,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03119080550968647,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.5577346842025488,
"eval_calibration/batch_distribution_entropy": 0.9293753890186642,
"eval_calibration/batch_entropy_100bins": 0.7052278361140917,
"eval_calibration/batch_entropy_10bins": 0.9293753890186642,
"eval_calibration/batch_entropy_50bins": 0.7897002923025568,
"eval_calibration/batch_uniqueness": 0.8984375,
"eval_calibration/buffer_distribution_entropy": 0.9865955502836785,
"eval_calibration/buffer_entropy_100bins": 0.9606136359667189,
"eval_calibration/buffer_entropy_10bins": 0.9865955502836785,
"eval_calibration/buffer_entropy_50bins": 0.9741520355969424,
"eval_calibration/confidence_entropy": 0.49245299957964217,
"eval_calibration/coverage@0%": 0.046875,
"eval_calibration/coverage@1%": 0.046875,
"eval_calibration/coverage@10%": 0.046875,
"eval_calibration/coverage@15%": 0.046875,
"eval_calibration/coverage@20%": 0.046875,
"eval_calibration/coverage@25%": 0.0625,
"eval_calibration/coverage@30%": 0.0625,
"eval_calibration/coverage@5%": 0.046875,
"eval_calibration/ece": 0.2602928906962808,
"eval_calibration/mean_confidence": 0.4507762260644716,
"eval_calibration/prompt_uniqueness": 0.8984375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 387.5,
"eval_completions/max_terminated_length": 387.5,
"eval_completions/mean_length": 186.24878692626953,
"eval_completions/mean_terminated_length": 186.24878692626953,
"eval_completions/min_length": 94.5,
"eval_completions/min_terminated_length": 94.5,
"eval_loss": 0.0,
"eval_num_tokens": 334150171.0,
"eval_reward": 0.6920952647924423,
"eval_reward_std": 0.21343515813350677,
"eval_rewards/accuracy_reward": 0.400390625,
"eval_rewards/brier_reward": 0.76617431640625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0032768649398349226,
"eval_rewards/frontier_coverage_1": 0.17960688099265099,
"eval_rewards/frontier_coverage_10": 0.17960688099265099,
"eval_rewards/frontier_coverage_15": 0.17960688099265099,
"eval_rewards/frontier_coverage_20": 0.17960688099265099,
"eval_rewards/frontier_coverage_25": 0.17960688099265099,
"eval_rewards/frontier_coverage_5": 0.17960688099265099,
"eval_rewards/frontier_ece_reward": 0.015176349552348256,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 20.5399,
"eval_samples_per_second": 24.343,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4666748046875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49022945761680603,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23333740234375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23333740234375,
"eval_signal/advantage_abs_mean": 0.19233160465955734,
"eval_signal/advantage_pre_scale_abs_mean": 0.19233160465955734,
"eval_signal/advantage_pre_scale_std": 0.21115415170788765,
"eval_signal/advantage_std": 0.21115415170788765,
"eval_signal/brier_reward/centered_abs_mean": 0.20825786143541336,
"eval_signal/brier_reward/group_bin_occupancy": 0.9296875,
"eval_signal/brier_reward/group_std_mean": 0.2583780698478222,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02603223267942667,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02603223267942667,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.002897722239140421,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.8359375,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.0046058918233029544,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.186922862776555e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.186922862776555e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_1/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_10/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.39053118973970413,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_5/group_std_mean": 0.47852831333875656,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0069905080599710345,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.03973545506596565,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.7734375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.06233951635658741,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0049669318832457066,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0049669318832457066,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.195,
"step": 100
},
{
"calibration/aurc": 0.3080561844853652,
"calibration/batch_distribution_entropy": 0.9775665099409345,
"calibration/batch_entropy_100bins": 0.9680939529103568,
"calibration/batch_entropy_10bins": 0.9775665099409345,
"calibration/batch_entropy_50bins": 0.9760559175224444,
"calibration/batch_uniqueness": 0.953204345703125,
"calibration/buffer_distribution_entropy": 0.9886758278700819,
"calibration/buffer_entropy_100bins": 0.9646465788301135,
"calibration/buffer_entropy_10bins": 0.9886758278700819,
"calibration/buffer_entropy_50bins": 0.9770686160222896,
"calibration/confidence_entropy": 0.5016109504275073,
"calibration/coverage@0%": 0.005078125,
"calibration/coverage@1%": 0.005078125,
"calibration/coverage@10%": 0.04765625,
"calibration/coverage@15%": 0.087109375,
"calibration/coverage@20%": 0.164453125,
"calibration/coverage@25%": 0.394921875,
"calibration/coverage@30%": 0.550390625,
"calibration/coverage@5%": 0.005078125,
"calibration/ece": 0.14772412802918822,
"calibration/mean_confidence": 0.44700384713572605,
"calibration/prompt_uniqueness": 0.86708984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 693.6,
"completions/max_terminated_length": 462.4,
"completions/mean_length": 185.40810546875,
"completions/mean_terminated_length": 185.27640075683593,
"completions/min_length": 85.0,
"completions/min_terminated_length": 85.0,
"epoch": 0.336,
"grad_norm": 0.0010472203139215708,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 350771182.0,
"reward": 0.8507241845130921,
"reward_std": 0.10451295822858811,
"rewards/accuracy_reward": 0.53642578125,
"rewards/brier_reward": 0.7645717978477478,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0025647633709013464,
"rewards/frontier_coverage_1": 0.0841044221073389,
"rewards/frontier_coverage_10": 0.0841044221073389,
"rewards/frontier_coverage_15": 0.0841044221073389,
"rewards/frontier_coverage_20": 0.0841044221073389,
"rewards/frontier_coverage_25": 0.0841044221073389,
"rewards/frontier_coverage_5": 0.0841044221073389,
"rewards/frontier_ece_reward": 0.017082036286592484,
"rewards/frontier_entropy_batch_reward": -0.1930681586265564,
"signal/accuracy_reward/centered_abs_mean": 0.114898681640625,
"signal/accuracy_reward/group_bin_occupancy": 0.18046875,
"signal/accuracy_reward/group_std_mean": 0.1537907287478447,
"signal/accuracy_reward/group_zero_std_frac": 0.55625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0574493408203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0574493408203125,
"signal/advantage_abs_mean": 0.08221976161003113,
"signal/advantage_pre_scale_abs_mean": 0.08221976161003113,
"signal/advantage_pre_scale_std": 0.11908840835094452,
"signal/advantage_std": 0.11908840835094452,
"signal/brier_reward/centered_abs_mean": 0.15783025622367858,
"signal/brier_reward/group_bin_occupancy": 0.868359375,
"signal/brier_reward/group_std_mean": 0.1996555894613266,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.019728782027959822,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.019728782027959822,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.001953614945523441,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.771875,
"signal/frontier_aurc_reward/group_std_mean": 0.0028803437016904354,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.496970675769262e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.496970675769262e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_1/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_10/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_10/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_15/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_15/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_20/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_20/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_25/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_25/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_5/centered_abs_mean": 0.20813391208648682,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8890625,
"signal/frontier_coverage_5/group_std_mean": 0.2666136801242828,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003725596936419606,
"signal/frontier_ece_reward/centered_abs_mean": 0.033592797070741656,
"signal/frontier_ece_reward/group_bin_occupancy": 0.826171875,
"signal/frontier_ece_reward/group_std_mean": 0.0477225124835968,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.004199099633842707,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.004199099633842707,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27311921715736387,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35031378269195557,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034139902144670484,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034139902144670484,
"step": 105
},
{
"calibration/aurc": 0.3112573154501758,
"calibration/batch_distribution_entropy": 0.9691682173397554,
"calibration/batch_entropy_100bins": 0.9620743358290158,
"calibration/batch_entropy_10bins": 0.9691682173397554,
"calibration/batch_entropy_50bins": 0.968651729542121,
"calibration/batch_uniqueness": 0.9505657025562766,
"calibration/buffer_distribution_entropy": 0.9942777995898278,
"calibration/buffer_entropy_100bins": 0.9773292841249249,
"calibration/buffer_entropy_10bins": 0.9942777995898278,
"calibration/buffer_entropy_50bins": 0.9859843966922479,
"calibration/confidence_entropy": 0.47586445065632177,
"calibration/coverage@0%": 0.03093057514101531,
"calibration/coverage@1%": 0.03093057514101531,
"calibration/coverage@10%": 0.19166808761175705,
"calibration/coverage@15%": 0.32717660608764054,
"calibration/coverage@20%": 0.3968004903159894,
"calibration/coverage@25%": 0.4562750224233529,
"calibration/coverage@30%": 0.5329890749107863,
"calibration/coverage@5%": 0.09618102720540271,
"calibration/ece": 0.1232984263006351,
"calibration/mean_confidence": 0.44606582673910006,
"calibration/prompt_uniqueness": 0.8618793594725546,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 982.8,
"completions/max_terminated_length": 697.4,
"completions/mean_length": 189.2787109375,
"completions/mean_terminated_length": 188.88477478027343,
"completions/min_length": 89.0,
"completions/min_terminated_length": 89.0,
"epoch": 0.352,
"grad_norm": 0.0011104086879640818,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 367969812.0,
"reward": 0.8267355799674988,
"reward_std": 0.10339633971452714,
"rewards/accuracy_reward": 0.4732421875,
"rewards/brier_reward": 0.7793355941772461,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0028707799036055805,
"rewards/frontier_coverage_1": 0.13734577894210814,
"rewards/frontier_coverage_10": 0.13734577894210814,
"rewards/frontier_coverage_15": 0.13734577894210814,
"rewards/frontier_coverage_20": 0.13734577894210814,
"rewards/frontier_coverage_25": 0.13734577894210814,
"rewards/frontier_coverage_5": 0.13734577894210814,
"rewards/frontier_ece_reward": 0.01663174610584974,
"rewards/frontier_entropy_batch_reward": -0.19108545184135436,
"signal/accuracy_reward/centered_abs_mean": 0.10980224609375,
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
"signal/accuracy_reward/group_std_mean": 0.14368323981761932,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.054901123046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054901123046875,
"signal/advantage_abs_mean": 0.08111239075660706,
"signal/advantage_pre_scale_abs_mean": 0.08111239075660706,
"signal/advantage_pre_scale_std": 0.1185634657740593,
"signal/advantage_std": 0.1185634657740593,
"signal/brier_reward/centered_abs_mean": 0.15005984008312226,
"signal/brier_reward/group_bin_occupancy": 0.854296875,
"signal/brier_reward/group_std_mean": 0.19159983992576599,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018757480010390282,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018757480010390282,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0021185804391279815,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7625,
"signal/frontier_aurc_reward/group_std_mean": 0.0031617959029972553,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.792258794419467e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.792258794419467e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_1/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_1/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_10/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_10/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_10/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_15/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_15/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_15/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_20/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_20/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_20/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_25/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_25/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_25/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_5/centered_abs_mean": 0.20387679040431977,
"signal/frontier_coverage_5/group_bin_occupancy": 0.888671875,
"signal/frontier_coverage_5/group_std_mean": 0.2581924706697464,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003649394493550062,
"signal/frontier_ece_reward/centered_abs_mean": 0.030297876521945,
"signal/frontier_ece_reward/group_bin_occupancy": 0.841015625,
"signal/frontier_ece_reward/group_std_mean": 0.04242234602570534,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.003787234565243125,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.003787234565243125,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2668303608894348,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34631708860397337,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335379511117935,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335379511117935,
"step": 110
},
{
"calibration/aurc": 0.3787179681778446,
"calibration/batch_distribution_entropy": 0.9810928602094272,
"calibration/batch_entropy_100bins": 0.9679847491523969,
"calibration/batch_entropy_10bins": 0.9810928602094272,
"calibration/batch_entropy_50bins": 0.9765758073335764,
"calibration/batch_uniqueness": 0.9527984619140625,
"calibration/buffer_distribution_entropy": 0.9977757450664437,
"calibration/buffer_entropy_100bins": 0.9878115860267597,
"calibration/buffer_entropy_10bins": 0.9977757450664437,
"calibration/buffer_entropy_50bins": 0.992834295234476,
"calibration/confidence_entropy": 0.4810720543670087,
"calibration/coverage@0%": 0.00625,
"calibration/coverage@1%": 0.00625,
"calibration/coverage@10%": 0.044921875,
"calibration/coverage@15%": 0.07109375,
"calibration/coverage@20%": 0.137109375,
"calibration/coverage@25%": 0.33671875,
"calibration/coverage@30%": 0.45859375,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.141736657577259,
"calibration/mean_confidence": 0.5299977859351189,
"calibration/prompt_uniqueness": 0.8634765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 894.6,
"completions/max_terminated_length": 514.8,
"completions/mean_length": 190.746875,
"completions/mean_terminated_length": 190.48418273925782,
"completions/min_length": 90.2,
"completions/min_terminated_length": 90.2,
"epoch": 0.368,
"grad_norm": 0.0009830680210143328,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 384988532.0,
"reward": 0.8328436970710754,
"reward_std": 0.10252733081579209,
"rewards/accuracy_reward": 0.49130859375,
"rewards/brier_reward": 0.774866783618927,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0033453899435698987,
"rewards/frontier_coverage_1": 0.1207592561841011,
"rewards/frontier_coverage_10": 0.1207592561841011,
"rewards/frontier_coverage_15": 0.1207592561841011,
"rewards/frontier_coverage_20": 0.1207592561841011,
"rewards/frontier_coverage_25": 0.1207592561841011,
"rewards/frontier_coverage_5": 0.1207592561841011,
"rewards/frontier_ece_reward": 0.015102808736264706,
"rewards/frontier_entropy_batch_reward": -0.19416911602020265,
"signal/accuracy_reward/centered_abs_mean": 0.107684326171875,
"signal/accuracy_reward/group_bin_occupancy": 0.180078125,
"signal/accuracy_reward/group_std_mean": 0.14609776586294174,
"signal/accuracy_reward/group_zero_std_frac": 0.559375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0538421630859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0538421630859375,
"signal/advantage_abs_mean": 0.07987660020589829,
"signal/advantage_pre_scale_abs_mean": 0.07987660020589829,
"signal/advantage_pre_scale_std": 0.11792214959859848,
"signal/advantage_std": 0.11792214959859848,
"signal/brier_reward/centered_abs_mean": 0.14661412835121154,
"signal/brier_reward/group_bin_occupancy": 0.866796875,
"signal/brier_reward/group_std_mean": 0.18781245350837708,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018326766043901443,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.018326766043901443,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028235503938049077,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.773046875,
"signal/frontier_aurc_reward/group_std_mean": 0.004136141994968057,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.054155117250048e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.054155117250048e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_1/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_1/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_10/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_10/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_10/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_15/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_15/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_15/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_20/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_20/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_20/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_25/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_25/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_25/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_5/centered_abs_mean": 0.1856350988149643,
"signal/frontier_coverage_5/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_5/group_std_mean": 0.23784518837928773,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00332286823540926,
"signal/frontier_ece_reward/centered_abs_mean": 0.029052532091736794,
"signal/frontier_ece_reward/group_bin_occupancy": 0.865234375,
"signal/frontier_ece_reward/group_std_mean": 0.03975553885102272,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0036315665114670993,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0036315665114670993,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2735773980617523,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35312792658805847,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03419717475771904,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03419717475771904,
"step": 115
},
{
"calibration/aurc": 0.3436694397661129,
"calibration/batch_distribution_entropy": 0.9729021229483618,
"calibration/batch_entropy_100bins": 0.9666308319358716,
"calibration/batch_entropy_10bins": 0.9729021229483618,
"calibration/batch_entropy_50bins": 0.9741625269875778,
"calibration/batch_uniqueness": 0.9517198658975239,
"calibration/buffer_distribution_entropy": 0.9991215376397509,
"calibration/buffer_entropy_100bins": 0.9948472735016413,
"calibration/buffer_entropy_10bins": 0.9991215376397509,
"calibration/buffer_entropy_50bins": 0.9970063375341864,
"calibration/confidence_entropy": 0.459402213935696,
"calibration/coverage@0%": 0.019553418542074364,
"calibration/coverage@1%": 0.019553418542074364,
"calibration/coverage@10%": 0.1654216609589041,
"calibration/coverage@15%": 0.2588284307729941,
"calibration/coverage@20%": 0.295582344667319,
"calibration/coverage@25%": 0.34835952788649704,
"calibration/coverage@30%": 0.4058150379158512,
"calibration/coverage@5%": 0.09621147260273973,
"calibration/ece": 0.14823283473979842,
"calibration/mean_confidence": 0.46903977021675436,
"calibration/prompt_uniqueness": 0.8525091355846774,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1344.8,
"completions/max_terminated_length": 674.6,
"completions/mean_length": 192.716796875,
"completions/mean_terminated_length": 192.05948486328126,
"completions/min_length": 96.8,
"completions/min_terminated_length": 96.8,
"epoch": 0.384,
"grad_norm": 0.0008304574876092374,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 401818464.0,
"reward": 0.8498589992523193,
"reward_std": 0.10006450712680817,
"rewards/accuracy_reward": 0.526953125,
"rewards/brier_reward": 0.7873589873313904,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_aurc_reward": -0.003031095629557967,
"rewards/frontier_coverage_1": 0.11328938379883766,
"rewards/frontier_coverage_10": 0.11328938379883766,
"rewards/frontier_coverage_15": 0.11328938379883766,
"rewards/frontier_coverage_20": 0.11328938379883766,
"rewards/frontier_coverage_25": 0.11328938379883766,
"rewards/frontier_coverage_5": 0.11328938379883766,
"rewards/frontier_ece_reward": 0.017132452875375747,
"rewards/frontier_entropy_batch_reward": -0.2076016277074814,
"signal/accuracy_reward/centered_abs_mean": 0.10501708984375,
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
"signal/accuracy_reward/group_std_mean": 0.14193961024284363,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052508544921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.052508544921875,
"signal/advantage_abs_mean": 0.07748562693595887,
"signal/advantage_pre_scale_abs_mean": 0.07748562693595887,
"signal/advantage_pre_scale_std": 0.11569896936416627,
"signal/advantage_std": 0.11569896936416627,
"signal/brier_reward/centered_abs_mean": 0.13543253839015962,
"signal/brier_reward/group_bin_occupancy": 0.837890625,
"signal/brier_reward/group_std_mean": 0.17658950984477997,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016929067298769952,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016929067298769952,
"signal/format_reward/centered_abs_mean": 0.001324462890625,
"signal/format_reward/group_bin_occupancy": 0.127734375,
"signal/format_reward/group_std_mean": 0.003866990143433213,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0030818260740488766,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.763671875,
"signal/frontier_aurc_reward/group_std_mean": 0.004590557329356671,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.5164685181807724e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.5164685181807724e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_1/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_10/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_10/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_15/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_15/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_20/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_20/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_25/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_25/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_5/centered_abs_mean": 0.17714880108833314,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_5/group_std_mean": 0.22984228730201722,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031709634698927403,
"signal/frontier_ece_reward/centered_abs_mean": 0.026306905224919318,
"signal/frontier_ece_reward/group_bin_occupancy": 0.859375,
"signal/frontier_ece_reward/group_std_mean": 0.03540766686201095,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0032883631531149147,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0032883631531149147,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27145218253135683,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34705948233604433,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033931522816419604,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033931522816419604,
"step": 120
},
{
"calibration/aurc": 0.4289352146594846,
"calibration/batch_distribution_entropy": 0.9840787382808802,
"calibration/batch_entropy_100bins": 0.9694645908187793,
"calibration/batch_entropy_10bins": 0.9840787382808802,
"calibration/batch_entropy_50bins": 0.9801606203621676,
"calibration/batch_uniqueness": 0.9542595829124529,
"calibration/buffer_distribution_entropy": 0.9990103579945624,
"calibration/buffer_entropy_100bins": 0.9979487473823087,
"calibration/buffer_entropy_10bins": 0.9990103579945624,
"calibration/buffer_entropy_50bins": 0.9985370679148613,
"calibration/confidence_entropy": 0.4909405142009618,
"calibration/coverage@0%": 0.003907014432485323,
"calibration/coverage@1%": 0.003907014432485323,
"calibration/coverage@10%": 0.003907014432485323,
"calibration/coverage@15%": 0.007422639432485323,
"calibration/coverage@20%": 0.016407014432485323,
"calibration/coverage@25%": 0.10820388943248531,
"calibration/coverage@30%": 0.1953132644324853,
"calibration/coverage@5%": 0.003907014432485323,
"calibration/ece": 0.17591819275681503,
"calibration/mean_confidence": 0.5040096555151118,
"calibration/prompt_uniqueness": 0.8661961518763007,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1150.2,
"completions/max_terminated_length": 636.6,
"completions/mean_length": 193.98671875,
"completions/mean_terminated_length": 193.59335632324218,
"completions/min_length": 97.6,
"completions/min_terminated_length": 97.6,
"epoch": 0.4,
"grad_norm": 0.00110912777017802,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 418841336.0,
"reward": 0.8370228409767151,
"reward_std": 0.10723992139101028,
"rewards/accuracy_reward": 0.50869140625,
"rewards/brier_reward": 0.7669232487678528,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.003695770213380456,
"rewards/frontier_coverage_1": 0.1053567928262055,
"rewards/frontier_coverage_10": 0.1053567928262055,
"rewards/frontier_coverage_15": 0.1053567928262055,
"rewards/frontier_coverage_20": 0.1053567928262055,
"rewards/frontier_coverage_25": 0.1053567928262055,
"rewards/frontier_coverage_5": 0.1053567928262055,
"rewards/frontier_ece_reward": 0.012649891711771489,
"rewards/frontier_entropy_batch_reward": -0.20658698678016663,
"signal/accuracy_reward/centered_abs_mean": 0.121722412109375,
"signal/accuracy_reward/group_bin_occupancy": 0.180859375,
"signal/accuracy_reward/group_std_mean": 0.15902018547058105,
"signal/accuracy_reward/group_zero_std_frac": 0.553125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0608612060546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0608612060546875,
"signal/advantage_abs_mean": 0.08474465608596801,
"signal/advantage_pre_scale_abs_mean": 0.08474465608596801,
"signal/advantage_pre_scale_std": 0.12552352696657182,
"signal/advantage_std": 0.12552352696657182,
"signal/brier_reward/centered_abs_mean": 0.14580391943454743,
"signal/brier_reward/group_bin_occupancy": 0.859765625,
"signal/brier_reward/group_std_mean": 0.18698894679546357,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01822548992931843,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01822548992931843,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003800245560705662,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_aurc_reward/group_std_mean": 0.005965401232242584,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 6.80243938404601e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 6.80243938404601e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_1/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_1/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_10/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_10/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_15/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_15/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_20/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_20/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_20/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_25/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_25/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_25/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_5/centered_abs_mean": 0.1798011213541031,
"signal/frontier_coverage_5/group_bin_occupancy": 0.862890625,
"signal/frontier_coverage_5/group_std_mean": 0.23461248278617858,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032184398733079433,
"signal/frontier_ece_reward/centered_abs_mean": 0.022467482089996337,
"signal/frontier_ece_reward/group_bin_occupancy": 0.891015625,
"signal/frontier_ece_reward/group_std_mean": 0.02992837503552437,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.002808435261249542,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.002808435261249542,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2770455002784729,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73203125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35210344195365906,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034630687534809114,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034630687534809114,
"step": 125
},
{
"calibration/aurc": 0.31638217177441763,
"calibration/batch_distribution_entropy": 0.9742986092431437,
"calibration/batch_entropy_100bins": 0.9684802374319121,
"calibration/batch_entropy_10bins": 0.9742986092431437,
"calibration/batch_entropy_50bins": 0.9745079873131764,
"calibration/batch_uniqueness": 0.9527449993937459,
"calibration/buffer_distribution_entropy": 0.99893326457422,
"calibration/buffer_entropy_100bins": 0.9988161195947732,
"calibration/buffer_entropy_10bins": 0.99893326457422,
"calibration/buffer_entropy_50bins": 0.9989688448398335,
"calibration/confidence_entropy": 0.5050498236628885,
"calibration/coverage@0%": 0.01171875,
"calibration/coverage@1%": 0.01171875,
"calibration/coverage@10%": 0.045703125,
"calibration/coverage@15%": 0.071484375,
"calibration/coverage@20%": 0.15277641878669276,
"calibration/coverage@25%": 0.27740566903131114,
"calibration/coverage@30%": 0.4689112952544031,
"calibration/coverage@5%": 0.014453125,
"calibration/ece": 0.09948484508211022,
"calibration/mean_confidence": 0.5181539975937481,
"calibration/prompt_uniqueness": 0.8683413883649844,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 1142.0,
"completions/max_terminated_length": 758.0,
"completions/mean_length": 196.4564453125,
"completions/mean_terminated_length": 195.80313110351562,
"completions/min_length": 101.2,
"completions/min_terminated_length": 101.2,
"epoch": 0.416,
"grad_norm": 0.0009177210740745068,
"learning_rate": 1e-06,
"loss": 0.0013,
"num_tokens": 435734234.0,
"reward": 0.845232892036438,
"reward_std": 0.10218746364116668,
"rewards/accuracy_reward": 0.5146484375,
"rewards/brier_reward": 0.7825942277908325,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_aurc_reward": -0.0030067469459027054,
"rewards/frontier_coverage_1": 0.10945327430963517,
"rewards/frontier_coverage_10": 0.10945327430963517,
"rewards/frontier_coverage_15": 0.10945327430963517,
"rewards/frontier_coverage_20": 0.10945327430963517,
"rewards/frontier_coverage_25": 0.10938042849302292,
"rewards/frontier_coverage_5": 0.10945327430963517,
"rewards/frontier_ece_reward": 0.012254784442484378,
"rewards/frontier_entropy_batch_reward": -0.18322778046131133,
"signal/accuracy_reward/centered_abs_mean": 0.111279296875,
"signal/accuracy_reward/group_bin_occupancy": 0.177734375,
"signal/accuracy_reward/group_std_mean": 0.14699049890041352,
"signal/accuracy_reward/group_zero_std_frac": 0.578125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0556396484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0556396484375,
"signal/advantage_abs_mean": 0.07910501658916473,
"signal/advantage_pre_scale_abs_mean": 0.07910501658916473,
"signal/advantage_pre_scale_std": 0.11764014065265656,
"signal/advantage_std": 0.11764014065265656,
"signal/brier_reward/centered_abs_mean": 0.14102001786231994,
"signal/brier_reward/group_bin_occupancy": 0.85859375,
"signal/brier_reward/group_std_mean": 0.18009372055530548,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017627502232789992,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.017627502232789992,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_bin_occupancy": 0.126953125,
"signal/format_reward/group_std_mean": 0.002762135770171881,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032022904139012097,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73828125,
"signal/frontier_aurc_reward/group_std_mean": 0.005255010444670916,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.7320995983900504e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.7320995983900504e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.18824252784252166,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_1/group_std_mean": 0.23771241903305054,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_10/centered_abs_mean": 0.18824252784252166,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_10/group_std_mean": 0.23771241903305054,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_15/centered_abs_mean": 0.18824252784252166,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_15/group_std_mean": 0.23771241903305054,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_20/centered_abs_mean": 0.18824252784252166,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_20/group_std_mean": 0.23771241903305054,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_25/centered_abs_mean": 0.18796592950820923,
"signal/frontier_coverage_25/group_bin_occupancy": 0.886328125,
"signal/frontier_coverage_25/group_std_mean": 0.23737676739692687,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003364589996635914,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003364589996635914,
"signal/frontier_coverage_5/centered_abs_mean": 0.18824252784252166,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8859375,
"signal/frontier_coverage_5/group_std_mean": 0.23771241903305054,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003369541047140956,
"signal/frontier_ece_reward/centered_abs_mean": 0.01897584684193134,
"signal/frontier_ece_reward/group_bin_occupancy": 0.905078125,
"signal/frontier_ece_reward/group_std_mean": 0.0248013224452734,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0023719808552414177,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0023719808552414177,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.258677664399147,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3334538578987122,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03233470804989338,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03233470804989338,
"step": 130
},
{
"calibration/aurc": 0.27110627824789024,
"calibration/batch_distribution_entropy": 0.9834757676207486,
"calibration/batch_entropy_100bins": 0.9703422906787201,
"calibration/batch_entropy_10bins": 0.9834757676207486,
"calibration/batch_entropy_50bins": 0.9793241316918639,
"calibration/batch_uniqueness": 0.954693603515625,
"calibration/buffer_distribution_entropy": 0.9993392738307401,
"calibration/buffer_entropy_100bins": 0.9991317727363503,
"calibration/buffer_entropy_10bins": 0.9993392738307401,
"calibration/buffer_entropy_50bins": 0.9992819152374992,
"calibration/confidence_entropy": 0.47524143822844167,
"calibration/coverage@0%": 0.016796875,
"calibration/coverage@1%": 0.016796875,
"calibration/coverage@10%": 0.074609375,
"calibration/coverage@15%": 0.255078125,
"calibration/coverage@20%": 0.341796875,
"calibration/coverage@25%": 0.423828125,
"calibration/coverage@30%": 0.580078125,
"calibration/coverage@5%": 0.021484375,
"calibration/ece": 0.11594602895950101,
"calibration/mean_confidence": 0.5297345627242726,
"calibration/prompt_uniqueness": 0.85546875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 903.6,
"completions/max_terminated_length": 518.6,
"completions/mean_length": 197.63076171875,
"completions/mean_terminated_length": 197.3692840576172,
"completions/min_length": 99.8,
"completions/min_terminated_length": 99.8,
"epoch": 0.432,
"grad_norm": 0.0010740357683971524,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 452772309.0,
"reward": 0.8608399510383606,
"reward_std": 0.09858821481466293,
"rewards/accuracy_reward": 0.54716796875,
"rewards/brier_reward": 0.7949665904045105,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002391556603834033,
"rewards/frontier_coverage_1": 0.10390491709113121,
"rewards/frontier_coverage_10": 0.10390475168824195,
"rewards/frontier_coverage_15": 0.10390453487634659,
"rewards/frontier_coverage_20": 0.10388994812965394,
"rewards/frontier_coverage_25": 0.10309707075357437,
"rewards/frontier_coverage_5": 0.10390491709113121,
"rewards/frontier_ece_reward": 0.012549491226673126,
"rewards/frontier_entropy_batch_reward": -0.1975017488002777,
"signal/accuracy_reward/centered_abs_mean": 0.111602783203125,
"signal/accuracy_reward/group_bin_occupancy": 0.17734375,
"signal/accuracy_reward/group_std_mean": 0.14663170725107194,
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0558013916015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0558013916015625,
"signal/advantage_abs_mean": 0.07775494158267975,
"signal/advantage_pre_scale_abs_mean": 0.07775494158267975,
"signal/advantage_pre_scale_std": 0.11521324068307877,
"signal/advantage_std": 0.11521324068307877,
"signal/brier_reward/centered_abs_mean": 0.1291389599442482,
"signal/brier_reward/group_bin_occupancy": 0.85390625,
"signal/brier_reward/group_std_mean": 0.16580995321273803,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016142369993031026,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016142369993031026,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027791480533778667,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73203125,
"signal/frontier_aurc_reward/group_std_mean": 0.004389007203280925,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.974674666300416e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.974674666300416e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1781601697206497,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_1/group_std_mean": 0.22925682067871095,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003189067030325532,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003189067030325532,
"signal/frontier_coverage_10/centered_abs_mean": 0.1781599998474121,
"signal/frontier_coverage_10/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_10/group_std_mean": 0.22925659418106079,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003189063956961036,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003189063956961036,
"signal/frontier_coverage_15/centered_abs_mean": 0.1781597375869751,
"signal/frontier_coverage_15/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_15/group_std_mean": 0.22925626039505004,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031890592537820337,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031890592537820337,
"signal/frontier_coverage_20/centered_abs_mean": 0.17814434170722962,
"signal/frontier_coverage_20/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_20/group_std_mean": 0.22923634946346283,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031887838151305912,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031887838151305912,
"signal/frontier_coverage_25/centered_abs_mean": 0.1761375993490219,
"signal/frontier_coverage_25/group_bin_occupancy": 0.871875,
"signal/frontier_coverage_25/group_std_mean": 0.2266537368297577,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0031528628896921873,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0031528628896921873,
"signal/frontier_coverage_5/centered_abs_mean": 0.1781601697206497,
"signal/frontier_coverage_5/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_5/group_std_mean": 0.22925682067871095,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003189067030325532,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003189067030325532,
"signal/frontier_ece_reward/centered_abs_mean": 0.016893037036061286,
"signal/frontier_ece_reward/group_bin_occupancy": 0.90234375,
"signal/frontier_ece_reward/group_std_mean": 0.02187432684004307,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021116296295076607,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021116296295076607,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2685338854789734,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7390625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3456457793712616,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033566735684871674,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033566735684871674,
"step": 135
},
{
"calibration/aurc": 0.28285148846655567,
"calibration/batch_distribution_entropy": 0.9756966652094482,
"calibration/batch_entropy_100bins": 0.9671782953324402,
"calibration/batch_entropy_10bins": 0.9756966652094482,
"calibration/batch_entropy_50bins": 0.974769557863015,
"calibration/batch_uniqueness": 0.9531880721897764,
"calibration/buffer_distribution_entropy": 0.9993609676432893,
"calibration/buffer_entropy_100bins": 0.9991891999147186,
"calibration/buffer_entropy_10bins": 0.9993609676432893,
"calibration/buffer_entropy_50bins": 0.999319530042054,
"calibration/confidence_entropy": 0.5029605620968143,
"calibration/coverage@0%": 0.025391389432485322,
"calibration/coverage@1%": 0.025391389432485322,
"calibration/coverage@10%": 0.08907167318982387,
"calibration/coverage@15%": 0.15821459148727984,
"calibration/coverage@20%": 0.23907473091976517,
"calibration/coverage@25%": 0.3297272504892368,
"calibration/coverage@30%": 0.5098833476027397,
"calibration/coverage@5%": 0.04375076443248532,
"calibration/ece": 0.11815471566014837,
"calibration/mean_confidence": 0.5428597595536488,
"calibration/prompt_uniqueness": 0.867127087262617,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 1115.8,
"completions/max_terminated_length": 790.0,
"completions/mean_length": 203.92919921875,
"completions/mean_terminated_length": 203.5388946533203,
"completions/min_length": 105.0,
"completions/min_terminated_length": 105.0,
"epoch": 0.448,
"grad_norm": 0.0009459942230023444,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 469813344.0,
"reward": 0.8468173861503601,
"reward_std": 0.09660987108945847,
"rewards/accuracy_reward": 0.51064453125,
"rewards/brier_reward": 0.7961806297302246,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002907540462911129,
"rewards/frontier_coverage_1": 0.12633958905935289,
"rewards/frontier_coverage_10": 0.12633958905935289,
"rewards/frontier_coverage_15": 0.12633958905935289,
"rewards/frontier_coverage_20": 0.1263102501630783,
"rewards/frontier_coverage_25": 0.1250537723302841,
"rewards/frontier_coverage_5": 0.12633958905935289,
"rewards/frontier_ece_reward": 0.010841607302427291,
"rewards/frontier_entropy_batch_reward": -0.1818355828523636,
"signal/accuracy_reward/centered_abs_mean": 0.104315185546875,
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
"signal/accuracy_reward/group_std_mean": 0.1402893543243408,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0521575927734375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0521575927734375,
"signal/advantage_abs_mean": 0.07476266324520112,
"signal/advantage_pre_scale_abs_mean": 0.07476266324520112,
"signal/advantage_pre_scale_std": 0.11200970113277435,
"signal/advantage_std": 0.11200970113277435,
"signal/brier_reward/centered_abs_mean": 0.12935363054275512,
"signal/brier_reward/group_bin_occupancy": 0.855078125,
"signal/brier_reward/group_std_mean": 0.16653329730033875,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01616920381784439,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01616920381784439,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002982544107362628,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708984375,
"signal/frontier_aurc_reward/group_std_mean": 0.004976610559970141,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.3387537627713753e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.3387537627713753e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17772595584392548,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_1/group_std_mean": 0.22692298889160156,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_10/centered_abs_mean": 0.17772595584392548,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_10/group_std_mean": 0.22692298889160156,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_15/centered_abs_mean": 0.17772595584392548,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_15/group_std_mean": 0.22692298889160156,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_20/centered_abs_mean": 0.1776350975036621,
"signal/frontier_coverage_20/group_bin_occupancy": 0.876171875,
"signal/frontier_coverage_20/group_std_mean": 0.22681189179420472,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031796682626008986,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031796682626008986,
"signal/frontier_coverage_25/centered_abs_mean": 0.17426885068416595,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_25/group_std_mean": 0.22260749340057373,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.003119412390515208,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003119412390515208,
"signal/frontier_coverage_5/centered_abs_mean": 0.17772595584392548,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_5/group_std_mean": 0.22692298889160156,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031812945380806923,
"signal/frontier_ece_reward/centered_abs_mean": 0.015317396074533463,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
"signal/frontier_ece_reward/group_std_mean": 0.0201519463211298,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001914674509316683,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001914674509316683,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2546141266822815,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.742578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.32758485078811644,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03182676583528519,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03182676583528519,
"step": 140
},
{
"calibration/aurc": 0.3984116060553997,
"calibration/batch_distribution_entropy": 0.9822031101518748,
"calibration/batch_entropy_100bins": 0.9714458118149061,
"calibration/batch_entropy_10bins": 0.9822031101518748,
"calibration/batch_entropy_50bins": 0.9794551763094261,
"calibration/batch_uniqueness": 0.9540252685546875,
"calibration/buffer_distribution_entropy": 0.9991268812264866,
"calibration/buffer_entropy_100bins": 0.9990936316751196,
"calibration/buffer_entropy_10bins": 0.9991268812264866,
"calibration/buffer_entropy_50bins": 0.9991993587888806,
"calibration/confidence_entropy": 0.5101373667560415,
"calibration/coverage@0%": 0.01171875,
"calibration/coverage@1%": 0.01171875,
"calibration/coverage@10%": 0.015625,
"calibration/coverage@15%": 0.019140625,
"calibration/coverage@20%": 0.1140625,
"calibration/coverage@25%": 0.18828125,
"calibration/coverage@30%": 0.283203125,
"calibration/coverage@5%": 0.01171875,
"calibration/ece": 0.12294057415090998,
"calibration/mean_confidence": 0.4963258028783944,
"calibration/prompt_uniqueness": 0.85458984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 1060.8,
"completions/max_terminated_length": 660.8,
"completions/mean_length": 204.21494140625,
"completions/mean_terminated_length": 203.95458374023437,
"completions/min_length": 107.8,
"completions/min_terminated_length": 107.8,
"epoch": 0.464,
"grad_norm": 0.0008858161745592952,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 487075321.0,
"reward": 0.8189481854438782,
"reward_std": 0.09576210975646973,
"rewards/accuracy_reward": 0.4634765625,
"rewards/brier_reward": 0.7746347069740296,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003687644097954035,
"rewards/frontier_coverage_1": 0.14042913317680358,
"rewards/frontier_coverage_10": 0.14042913317680358,
"rewards/frontier_coverage_15": 0.14042913317680358,
"rewards/frontier_coverage_20": 0.1404130145907402,
"rewards/frontier_coverage_25": 0.13810611069202422,
"rewards/frontier_coverage_5": 0.14042913317680358,
"rewards/frontier_ece_reward": 0.008142163883894682,
"rewards/frontier_entropy_batch_reward": -0.20411013662815095,
"signal/accuracy_reward/centered_abs_mean": 0.09339599609375,
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
"signal/accuracy_reward/group_std_mean": 0.13170869201421737,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.046697998046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.046697998046875,
"signal/advantage_abs_mean": 0.07327512502670289,
"signal/advantage_pre_scale_abs_mean": 0.07327512502670289,
"signal/advantage_pre_scale_std": 0.11063500344753266,
"signal/advantage_std": 0.11063500344753266,
"signal/brier_reward/centered_abs_mean": 0.1346296638250351,
"signal/brier_reward/group_bin_occupancy": 0.84765625,
"signal/brier_reward/group_std_mean": 0.17363184988498687,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01682870797812939,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01682870797812939,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0032396471593528985,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71796875,
"signal/frontier_aurc_reward/group_std_mean": 0.005168183147907257,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.798968268209137e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.798968268209137e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17095789611339568,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_1/group_std_mean": 0.22138096988201142,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_10/centered_abs_mean": 0.17095789611339568,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_10/group_std_mean": 0.22138096988201142,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_15/centered_abs_mean": 0.17095789611339568,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_15/group_std_mean": 0.22138096988201142,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_20/centered_abs_mean": 0.17093894481658936,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_20/group_std_mean": 0.2213562995195389,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030598069541156294,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030598069541156294,
"signal/frontier_coverage_25/centered_abs_mean": 0.16712769567966462,
"signal/frontier_coverage_25/group_bin_occupancy": 0.878125,
"signal/frontier_coverage_25/group_std_mean": 0.2164506733417511,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0029915857128798963,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0029915857128798963,
"signal/frontier_coverage_5/centered_abs_mean": 0.17095789611339568,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_5/group_std_mean": 0.22138096988201142,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00306014628149569,
"signal/frontier_ece_reward/centered_abs_mean": 0.0138343783095479,
"signal/frontier_ece_reward/group_bin_occupancy": 0.886328125,
"signal/frontier_ece_reward/group_std_mean": 0.01835048608481884,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0017292972886934876,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0017292972886934876,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27222808003425597,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34940491914749144,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.034028510004281996,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034028510004281996,
"step": 145
},
{
"calibration/aurc": 0.2832759600511364,
"calibration/batch_distribution_entropy": 0.9715486304314798,
"calibration/batch_entropy_100bins": 0.9645395963303353,
"calibration/batch_entropy_10bins": 0.9715486304314798,
"calibration/batch_entropy_50bins": 0.9721231779983697,
"calibration/batch_uniqueness": 0.9513397216796875,
"calibration/buffer_distribution_entropy": 0.9989767926105942,
"calibration/buffer_entropy_100bins": 0.9990000341778547,
"calibration/buffer_entropy_10bins": 0.9989767926105942,
"calibration/buffer_entropy_50bins": 0.9990904829689274,
"calibration/confidence_entropy": 0.480528899956653,
"calibration/coverage@0%": 0.009765625,
"calibration/coverage@1%": 0.009765625,
"calibration/coverage@10%": 0.134375,
"calibration/coverage@15%": 0.225,
"calibration/coverage@20%": 0.310546875,
"calibration/coverage@25%": 0.396484375,
"calibration/coverage@30%": 0.5953125,
"calibration/coverage@5%": 0.026953125,
"calibration/ece": 0.14768165181713427,
"calibration/mean_confidence": 0.49541220339789815,
"calibration/prompt_uniqueness": 0.85048828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 964.8,
"completions/max_terminated_length": 635.2,
"completions/mean_length": 199.683984375,
"completions/mean_terminated_length": 199.29322509765626,
"completions/min_length": 102.6,
"completions/min_terminated_length": 102.6,
"epoch": 0.48,
"grad_norm": 0.00261081475764513,
"learning_rate": 1e-06,
"loss": 0.001,
"num_tokens": 504168117.0,
"reward": 0.8438061833381653,
"reward_std": 0.09672794342041016,
"rewards/accuracy_reward": 0.51884765625,
"rewards/brier_reward": 0.7764919996261597,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.003090168023481965,
"rewards/frontier_coverage_1": 0.11306091845035553,
"rewards/frontier_coverage_10": 0.11306091845035553,
"rewards/frontier_coverage_15": 0.11306091845035553,
"rewards/frontier_coverage_20": 0.11289113312959671,
"rewards/frontier_coverage_25": 0.11131031811237335,
"rewards/frontier_coverage_5": 0.11306091845035553,
"rewards/frontier_ece_reward": 0.008230427093803883,
"rewards/frontier_entropy_batch_reward": -0.20491617918014526,
"signal/accuracy_reward/centered_abs_mean": 0.117669677734375,
"signal/accuracy_reward/group_bin_occupancy": 0.1796875,
"signal/accuracy_reward/group_std_mean": 0.15455419719219207,
"signal/accuracy_reward/group_zero_std_frac": 0.5625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0588348388671875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0588348388671875,
"signal/advantage_abs_mean": 0.07520890831947327,
"signal/advantage_pre_scale_abs_mean": 0.07520890831947327,
"signal/advantage_pre_scale_std": 0.11293750852346421,
"signal/advantage_std": 0.11293750852346421,
"signal/brier_reward/centered_abs_mean": 0.1358100563287735,
"signal/brier_reward/group_bin_occupancy": 0.8375,
"signal/brier_reward/group_std_mean": 0.1739418923854828,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016976257041096687,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016976257041096687,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029037161730229855,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_aurc_reward/group_std_mean": 0.0047456233762204645,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.1976518443552776e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.1976518443552776e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.19191361963748932,
"signal/frontier_coverage_1/group_bin_occupancy": 0.855859375,
"signal/frontier_coverage_1/group_std_mean": 0.24454809129238128,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_10/centered_abs_mean": 0.19191361963748932,
"signal/frontier_coverage_10/group_bin_occupancy": 0.855859375,
"signal/frontier_coverage_10/group_std_mean": 0.24454809129238128,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_15/centered_abs_mean": 0.19191361963748932,
"signal/frontier_coverage_15/group_bin_occupancy": 0.855859375,
"signal/frontier_coverage_15/group_std_mean": 0.24454809129238128,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_20/centered_abs_mean": 0.19139576852321624,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_20/group_std_mean": 0.24390313625335694,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003425984038040042,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003425984038040042,
"signal/frontier_coverage_25/centered_abs_mean": 0.18600209653377534,
"signal/frontier_coverage_25/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_25/group_std_mean": 0.23725315928459167,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0033294373657554387,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033294373657554387,
"signal/frontier_coverage_5/centered_abs_mean": 0.19191361963748932,
"signal/frontier_coverage_5/group_bin_occupancy": 0.855859375,
"signal/frontier_coverage_5/group_std_mean": 0.24454809129238128,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034352536778897045,
"signal/frontier_ece_reward/centered_abs_mean": 0.013514818623661995,
"signal/frontier_ece_reward/group_bin_occupancy": 0.901171875,
"signal/frontier_ece_reward/group_std_mean": 0.017669208720326422,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0016893523279577494,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0016893523279577494,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26671458780765533,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.743359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34039146900177003,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333932347595692,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333932347595692,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.5405549587051462,
"eval_calibration/batch_distribution_entropy": 0.9065406036093964,
"eval_calibration/batch_entropy_100bins": 0.6972847745692209,
"eval_calibration/batch_entropy_10bins": 0.9065406036093964,
"eval_calibration/batch_entropy_50bins": 0.7754914476517584,
"eval_calibration/batch_uniqueness": 0.89453125,
"eval_calibration/buffer_distribution_entropy": 0.9988051667639347,
"eval_calibration/buffer_entropy_100bins": 0.9988938324367392,
"eval_calibration/buffer_entropy_10bins": 0.9988051667639347,
"eval_calibration/buffer_entropy_50bins": 0.9989811313318033,
"eval_calibration/confidence_entropy": 0.48391809012155884,
"eval_calibration/coverage@0%": 0.0390625,
"eval_calibration/coverage@1%": 0.0390625,
"eval_calibration/coverage@10%": 0.0390625,
"eval_calibration/coverage@15%": 0.0390625,
"eval_calibration/coverage@20%": 0.046875,
"eval_calibration/coverage@25%": 0.046875,
"eval_calibration/coverage@30%": 0.046875,
"eval_calibration/coverage@5%": 0.0390625,
"eval_calibration/ece": 0.2232385876510525,
"eval_calibration/mean_confidence": 0.4669556942027052,
"eval_calibration/prompt_uniqueness": 0.89453125,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 387.0,
"eval_completions/max_terminated_length": 387.0,
"eval_completions/mean_length": 198.34226989746094,
"eval_completions/mean_terminated_length": 198.34226989746094,
"eval_completions/min_length": 125.0,
"eval_completions/min_terminated_length": 125.0,
"eval_loss": 0.0,
"eval_num_tokens": 504168117.0,
"eval_reward": 0.699365958571434,
"eval_reward_std": 0.21834751963615417,
"eval_rewards/accuracy_reward": 0.412109375,
"eval_rewards/brier_reward": 0.7784133553504944,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.004543848393950611,
"eval_rewards/frontier_coverage_1": 0.18927186354994774,
"eval_rewards/frontier_coverage_10": 0.18927159160375595,
"eval_rewards/frontier_coverage_15": 0.18926333636045456,
"eval_rewards/frontier_coverage_20": 0.18883728608489037,
"eval_rewards/frontier_coverage_25": 0.17392469197511673,
"eval_rewards/frontier_coverage_5": 0.18927186354994774,
"eval_rewards/frontier_ece_reward": 0.00836634065490216,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 19.6978,
"eval_samples_per_second": 25.384,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4649658203125,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4890812262892723,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23248291015625,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23248291015625,
"eval_signal/advantage_abs_mean": 0.19869232177734375,
"eval_signal/advantage_pre_scale_abs_mean": 0.19869232177734375,
"eval_signal/advantage_pre_scale_std": 0.2162884622812271,
"eval_signal/advantage_std": 0.2162884622812271,
"eval_signal/brier_reward/centered_abs_mean": 0.20233283191919327,
"eval_signal/brier_reward/group_bin_occupancy": 0.9140625,
"eval_signal/brier_reward/group_std_mean": 0.2505420297384262,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02529160398989916,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02529160398989916,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005963263858575374,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.78125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.010063497698865831,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 0.00010674241821106989,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 0.00010674241821106989,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36536306887865067,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4483560249209404,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00653999880887568,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00653999880887568,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3653620555996895,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4483548328280449,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0065399802988395095,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0065399802988395095,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.365331307053566,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_15/group_std_mean": 0.448319248855114,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0065394300036132336,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0065394300036132336,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.36427226662635803,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4470879137516022,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006520473049022257,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.006520473049022257,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.33615638315677643,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4133630245923996,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.006017199018970132,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.006017199018970132,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.36536306887865067,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.9765625,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4483560249209404,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00653999880887568,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00653999880887568,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.01719106500968337,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.022434783168137074,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0021488831262104213,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0021488831262104213,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.203,
"step": 150
},
{
"calibration/aurc": 0.3869300543313942,
"calibration/batch_distribution_entropy": 0.9808689690281369,
"calibration/batch_entropy_100bins": 0.9674042155001947,
"calibration/batch_entropy_10bins": 0.9808689690281369,
"calibration/batch_entropy_50bins": 0.976446592128552,
"calibration/batch_uniqueness": 0.9534576416015625,
"calibration/buffer_distribution_entropy": 0.9986828299800502,
"calibration/buffer_entropy_100bins": 0.9988154597442325,
"calibration/buffer_entropy_10bins": 0.9986828299800502,
"calibration/buffer_entropy_50bins": 0.9988999650475596,
"calibration/confidence_entropy": 0.4790370333140571,
"calibration/coverage@0%": 0.023828125,
"calibration/coverage@1%": 0.023828125,
"calibration/coverage@10%": 0.09765625,
"calibration/coverage@15%": 0.1453125,
"calibration/coverage@20%": 0.20390625,
"calibration/coverage@25%": 0.250390625,
"calibration/coverage@30%": 0.290625,
"calibration/coverage@5%": 0.053515625,
"calibration/ece": 0.13844372663002122,
"calibration/mean_confidence": 0.5048848300838907,
"calibration/prompt_uniqueness": 0.851220703125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 460.8,
"completions/max_terminated_length": 460.8,
"completions/mean_length": 195.812890625,
"completions/mean_terminated_length": 195.812890625,
"completions/min_length": 98.6,
"completions/min_terminated_length": 98.6,
"epoch": 0.496,
"grad_norm": 0.0008509118924848735,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 521481081.0,
"reward": 0.8582545042037963,
"reward_std": 0.09380114525556564,
"rewards/accuracy_reward": 0.5439453125,
"rewards/brier_reward": 0.7857403755187988,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003081470658071339,
"rewards/frontier_coverage_1": 0.10034325867891311,
"rewards/frontier_coverage_10": 0.10034310221672058,
"rewards/frontier_coverage_15": 0.10032327324151993,
"rewards/frontier_coverage_20": 0.10001767575740814,
"rewards/frontier_coverage_25": 0.09081147015094757,
"rewards/frontier_coverage_5": 0.10034325867891311,
"rewards/frontier_ece_reward": 0.007945819105952979,
"rewards/frontier_entropy_batch_reward": -0.18739983737468718,
"signal/accuracy_reward/centered_abs_mean": 0.0947265625,
"signal/accuracy_reward/group_bin_occupancy": 0.17421875,
"signal/accuracy_reward/group_std_mean": 0.13038647025823594,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04736328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04736328125,
"signal/advantage_abs_mean": 0.07237804681062698,
"signal/advantage_pre_scale_abs_mean": 0.07237804681062698,
"signal/advantage_pre_scale_std": 0.10942972600460052,
"signal/advantage_std": 0.10942972600460052,
"signal/brier_reward/centered_abs_mean": 0.12652941197156906,
"signal/brier_reward/group_bin_occupancy": 0.849609375,
"signal/brier_reward/group_std_mean": 0.1622232437133789,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015816176496446132,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015816176496446132,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.003096145251765847,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375,
"signal/frontier_aurc_reward/group_std_mean": 0.005016565602272749,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.542099897866137e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.542099897866137e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1638656437397003,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_1/group_std_mean": 0.21250625550746918,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002933195047080517,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002933195047080517,
"signal/frontier_coverage_10/centered_abs_mean": 0.16386164724826813,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_10/group_std_mean": 0.21250071823596955,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029331233818084,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029331233818084,
"signal/frontier_coverage_15/centered_abs_mean": 0.16379604637622833,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_15/group_std_mean": 0.21241317689418793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029319490771740676,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029319490771740676,
"signal/frontier_coverage_20/centered_abs_mean": 0.1630973845720291,
"signal/frontier_coverage_20/group_bin_occupancy": 0.867578125,
"signal/frontier_coverage_20/group_std_mean": 0.2114973783493042,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002919443091377616,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002919443091377616,
"signal/frontier_coverage_25/centered_abs_mean": 0.1438317209482193,
"signal/frontier_coverage_25/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_25/group_std_mean": 0.18710974752902984,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00257458770647645,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00257458770647645,
"signal/frontier_coverage_5/centered_abs_mean": 0.1638656437397003,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_5/group_std_mean": 0.21250625550746918,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002933195047080517,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002933195047080517,
"signal/frontier_ece_reward/centered_abs_mean": 0.011986837163567543,
"signal/frontier_ece_reward/group_bin_occupancy": 0.898046875,
"signal/frontier_ece_reward/group_std_mean": 0.015694568678736687,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001498354645445943,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001498354645445943,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26680760979652407,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34285420179367065,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03335095122456551,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03335095122456551,
"step": 155
},
{
"calibration/aurc": 0.31852894466439724,
"calibration/batch_distribution_entropy": 0.9846605362740333,
"calibration/batch_entropy_100bins": 0.9731097753973357,
"calibration/batch_entropy_10bins": 0.9846605362740333,
"calibration/batch_entropy_50bins": 0.9797415936230249,
"calibration/batch_uniqueness": 0.954290771484375,
"calibration/buffer_distribution_entropy": 0.9985237641268538,
"calibration/buffer_entropy_100bins": 0.9987331459546469,
"calibration/buffer_entropy_10bins": 0.9985237641268538,
"calibration/buffer_entropy_50bins": 0.9987960418931671,
"calibration/confidence_entropy": 0.5095141511134974,
"calibration/coverage@0%": 0.012890625,
"calibration/coverage@1%": 0.012890625,
"calibration/coverage@10%": 0.176953125,
"calibration/coverage@15%": 0.29765625,
"calibration/coverage@20%": 0.383203125,
"calibration/coverage@25%": 0.45546875,
"calibration/coverage@30%": 0.49453125,
"calibration/coverage@5%": 0.069140625,
"calibration/ece": 0.1362378664495895,
"calibration/mean_confidence": 0.5084870717219879,
"calibration/prompt_uniqueness": 0.86162109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 896.2,
"completions/max_terminated_length": 459.0,
"completions/mean_length": 189.36025390625,
"completions/mean_terminated_length": 189.0965362548828,
"completions/min_length": 101.0,
"completions/min_terminated_length": 101.0,
"epoch": 0.512,
"grad_norm": 0.00109315593726933,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 538565794.0,
"reward": 0.8589414358139038,
"reward_std": 0.09629883468151093,
"rewards/accuracy_reward": 0.5396484375,
"rewards/brier_reward": 0.8017237544059753,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002702287444844842,
"rewards/frontier_coverage_1": 0.11047709956765175,
"rewards/frontier_coverage_10": 0.11047184318304062,
"rewards/frontier_coverage_15": 0.11041708588600159,
"rewards/frontier_coverage_20": 0.10978015959262848,
"rewards/frontier_coverage_25": 0.09341206625103951,
"rewards/frontier_coverage_5": 0.11047709956765175,
"rewards/frontier_ece_reward": 0.008117536641657352,
"rewards/frontier_entropy_batch_reward": -0.18771363496780397,
"signal/accuracy_reward/centered_abs_mean": 0.1011474609375,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.13669176101684571,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05057373046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05057373046875,
"signal/advantage_abs_mean": 0.07451938837766647,
"signal/advantage_pre_scale_abs_mean": 0.07451938837766647,
"signal/advantage_pre_scale_std": 0.11410035341978073,
"signal/advantage_std": 0.11410035341978073,
"signal/brier_reward/centered_abs_mean": 0.12140367329120635,
"signal/brier_reward/group_bin_occupancy": 0.844921875,
"signal/brier_reward/group_std_mean": 0.15884515047073364,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015175459161400794,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015175459161400794,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002908071083948016,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.707421875,
"signal/frontier_aurc_reward/group_std_mean": 0.00480275945737958,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.205447159823961e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.205447159823961e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15683222711086273,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_1/group_std_mean": 0.20588673055171966,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028072968125343323,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028072968125343323,
"signal/frontier_coverage_10/centered_abs_mean": 0.15682255029678344,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_10/group_std_mean": 0.20587407648563386,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0028071236796677113,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028071236796677113,
"signal/frontier_coverage_15/centered_abs_mean": 0.15671851933002473,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_15/group_std_mean": 0.20573811531066893,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002805261267349124,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002805261267349124,
"signal/frontier_coverage_20/centered_abs_mean": 0.15555653274059295,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_20/group_std_mean": 0.20422441959381105,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027844619005918505,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027844619005918505,
"signal/frontier_coverage_25/centered_abs_mean": 0.1274886041879654,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85703125,
"signal/frontier_coverage_25/group_std_mean": 0.16840324103832244,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022820457816123962,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022820457816123962,
"signal/frontier_coverage_5/centered_abs_mean": 0.15683222711086273,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_5/group_std_mean": 0.20588673055171966,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028072968125343323,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028072968125343323,
"signal/frontier_ece_reward/centered_abs_mean": 0.010539719834923744,
"signal/frontier_ece_reward/group_bin_occupancy": 0.899609375,
"signal/frontier_ece_reward/group_std_mean": 0.013909543678164483,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001317464979365468,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001317464979365468,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2608456969261169,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7421875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33714223504066465,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032605712115764615,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032605712115764615,
"step": 160
},
{
"calibration/aurc": 0.21917538463284222,
"calibration/batch_distribution_entropy": 0.9878817709654515,
"calibration/batch_entropy_100bins": 0.9757889859436031,
"calibration/batch_entropy_10bins": 0.9878817709654515,
"calibration/batch_entropy_50bins": 0.983224028911913,
"calibration/batch_uniqueness": 0.9545867919921875,
"calibration/buffer_distribution_entropy": 0.9985031862617003,
"calibration/buffer_entropy_100bins": 0.9987566386866493,
"calibration/buffer_entropy_10bins": 0.9985031862617003,
"calibration/buffer_entropy_50bins": 0.9987999633528707,
"calibration/confidence_entropy": 0.4950461976985136,
"calibration/coverage@0%": 0.0078125,
"calibration/coverage@1%": 0.0078125,
"calibration/coverage@10%": 0.25234375,
"calibration/coverage@15%": 0.36328125,
"calibration/coverage@20%": 0.51875,
"calibration/coverage@25%": 0.65703125,
"calibration/coverage@30%": 0.7484375,
"calibration/coverage@5%": 0.141796875,
"calibration/ece": 0.12159026779133557,
"calibration/mean_confidence": 0.5102481512964239,
"calibration/prompt_uniqueness": 0.847412109375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 686.2,
"completions/max_terminated_length": 686.2,
"completions/mean_length": 187.4068359375,
"completions/mean_terminated_length": 187.4068359375,
"completions/min_length": 93.4,
"completions/min_terminated_length": 93.4,
"epoch": 0.528,
"grad_norm": 0.0009646462858654559,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 555514376.0,
"reward": 0.8567448496818543,
"reward_std": 0.0943350225687027,
"rewards/accuracy_reward": 0.53310546875,
"rewards/brier_reward": 0.8055103659629822,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0025082074804231524,
"rewards/frontier_coverage_1": 0.1262974977493286,
"rewards/frontier_coverage_10": 0.12625774666666983,
"rewards/frontier_coverage_15": 0.1262197308242321,
"rewards/frontier_coverage_20": 0.12498710155487061,
"rewards/frontier_coverage_25": 0.09902235716581345,
"rewards/frontier_coverage_5": 0.1262974977493286,
"rewards/frontier_ece_reward": 0.006973131839185953,
"rewards/frontier_entropy_batch_reward": -0.19499198198318482,
"signal/accuracy_reward/centered_abs_mean": 0.108697509765625,
"signal/accuracy_reward/group_bin_occupancy": 0.175,
"signal/accuracy_reward/group_std_mean": 0.1418474718928337,
"signal/accuracy_reward/group_zero_std_frac": 0.6,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0543487548828125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0543487548828125,
"signal/advantage_abs_mean": 0.07427967190742493,
"signal/advantage_pre_scale_abs_mean": 0.07427967190742493,
"signal/advantage_pre_scale_std": 0.1111309289932251,
"signal/advantage_std": 0.1111309289932251,
"signal/brier_reward/centered_abs_mean": 0.11962604224681854,
"signal/brier_reward/group_bin_occupancy": 0.844921875,
"signal/brier_reward/group_std_mean": 0.1540861427783966,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014953255280852317,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014953255280852317,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002598578087054193,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.696484375,
"signal/frontier_aurc_reward/group_std_mean": 0.004507921310141683,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.6514547284459697e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.6514547284459697e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17175144851207733,
"signal/frontier_coverage_1/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_1/group_std_mean": 0.21923006176948548,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030743507202714683,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030743507202714683,
"signal/frontier_coverage_10/centered_abs_mean": 0.1716869741678238,
"signal/frontier_coverage_10/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_10/group_std_mean": 0.21915002167224884,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003073196718469262,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003073196718469262,
"signal/frontier_coverage_15/centered_abs_mean": 0.17162050902843476,
"signal/frontier_coverage_15/group_bin_occupancy": 0.88125,
"signal/frontier_coverage_15/group_std_mean": 0.21906675398349762,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030720070470124485,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030720070470124485,
"signal/frontier_coverage_20/centered_abs_mean": 0.16946081519126893,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88359375,
"signal/frontier_coverage_20/group_std_mean": 0.21636516749858856,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030333484522998334,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030333484522998334,
"signal/frontier_coverage_25/centered_abs_mean": 0.12273151576519012,
"signal/frontier_coverage_25/group_bin_occupancy": 0.880859375,
"signal/frontier_coverage_25/group_std_mean": 0.15797219574451446,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0021968940272927284,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021968940272927284,
"signal/frontier_coverage_5/centered_abs_mean": 0.17175144851207733,
"signal/frontier_coverage_5/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_5/group_std_mean": 0.21923006176948548,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030743507202714683,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030743507202714683,
"signal/frontier_ece_reward/centered_abs_mean": 0.009134939312934876,
"signal/frontier_ece_reward/group_bin_occupancy": 0.883203125,
"signal/frontier_ece_reward/group_std_mean": 0.012016034871339797,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011418674141168595,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011418674141168595,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2628565192222595,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3387132942676544,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03285706490278244,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285706490278244,
"step": 165
},
{
"calibration/aurc": 0.2330965079108403,
"calibration/batch_distribution_entropy": 0.9783253458334003,
"calibration/batch_entropy_100bins": 0.967895082463165,
"calibration/batch_entropy_10bins": 0.9783253458334003,
"calibration/batch_entropy_50bins": 0.9760524996239462,
"calibration/batch_uniqueness": 0.9533475407324336,
"calibration/buffer_distribution_entropy": 0.9984517660348228,
"calibration/buffer_entropy_100bins": 0.9987360351808527,
"calibration/buffer_entropy_10bins": 0.9984517660348228,
"calibration/buffer_entropy_50bins": 0.9987687423048592,
"calibration/confidence_entropy": 0.47645242697701995,
"calibration/coverage@0%": 0.038713154354207434,
"calibration/coverage@1%": 0.038713154354207434,
"calibration/coverage@10%": 0.13606210249510764,
"calibration/coverage@15%": 0.2814059442270059,
"calibration/coverage@20%": 0.48266802226027394,
"calibration/coverage@25%": 0.6222021771037183,
"calibration/coverage@30%": 0.7140311582681018,
"calibration/coverage@5%": 0.07820908757338552,
"calibration/ece": 0.09310715311335953,
"calibration/mean_confidence": 0.5309102969282397,
"calibration/prompt_uniqueness": 0.8458068276047086,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 845.2,
"completions/max_terminated_length": 624.6,
"completions/mean_length": 188.4822265625,
"completions/mean_terminated_length": 188.35069885253907,
"completions/min_length": 99.4,
"completions/min_terminated_length": 99.4,
"epoch": 0.544,
"grad_norm": 0.0012510113883763552,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 572608018.0,
"reward": 0.8668020844459534,
"reward_std": 0.0995595932006836,
"rewards/accuracy_reward": 0.571484375,
"rewards/brier_reward": 0.787188982963562,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002780396491289139,
"rewards/frontier_coverage_1": 0.07695508673787117,
"rewards/frontier_coverage_10": 0.07694511339068413,
"rewards/frontier_coverage_15": 0.07687427774071694,
"rewards/frontier_coverage_20": 0.07620680481195449,
"rewards/frontier_coverage_25": 0.05764272883534431,
"rewards/frontier_coverage_5": 0.07695508673787117,
"rewards/frontier_ece_reward": 0.005536333145573735,
"rewards/frontier_entropy_batch_reward": -0.2059100717306137,
"signal/accuracy_reward/centered_abs_mean": 0.1152587890625,
"signal/accuracy_reward/group_bin_occupancy": 0.178515625,
"signal/accuracy_reward/group_std_mean": 0.15163064002990723,
"signal/accuracy_reward/group_zero_std_frac": 0.571875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05762939453125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05762939453125,
"signal/advantage_abs_mean": 0.0772314801812172,
"signal/advantage_pre_scale_abs_mean": 0.0772314801812172,
"signal/advantage_pre_scale_std": 0.11531815230846405,
"signal/advantage_std": 0.11531815230846405,
"signal/brier_reward/centered_abs_mean": 0.13076411485671996,
"signal/brier_reward/group_bin_occupancy": 0.857421875,
"signal/brier_reward/group_std_mean": 0.1675712913274765,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016345514357089995,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.016345514357089995,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002782534621655941,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875,
"signal/frontier_aurc_reward/group_std_mean": 0.004524756595492363,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9807369941845535e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9807369941845535e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.17586564421653747,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.22499242424964905,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0031479948200285436,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0031479948200285436,
"signal/frontier_coverage_10/centered_abs_mean": 0.1757751613855362,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.2248790979385376,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0031463753432035444,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0031463753432035444,
"signal/frontier_coverage_15/centered_abs_mean": 0.17549155354499818,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_15/group_std_mean": 0.22451838254928588,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0031412987038493155,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0031412987038493155,
"signal/frontier_coverage_20/centered_abs_mean": 0.1719941407442093,
"signal/frontier_coverage_20/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_20/group_std_mean": 0.2201235145330429,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003078695107251406,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003078695107251406,
"signal/frontier_coverage_25/centered_abs_mean": 0.1134360283613205,
"signal/frontier_coverage_25/group_bin_occupancy": 0.8578125,
"signal/frontier_coverage_25/group_std_mean": 0.14679449796676636,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002030504820868373,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002030504820868373,
"signal/frontier_coverage_5/centered_abs_mean": 0.17586564421653747,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_5/group_std_mean": 0.22499242424964905,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0031479948200285436,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0031479948200285436,
"signal/frontier_ece_reward/centered_abs_mean": 0.009148731268942356,
"signal/frontier_ece_reward/group_bin_occupancy": 0.894140625,
"signal/frontier_ece_reward/group_std_mean": 0.011937451735138892,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0011435914086177946,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0011435914086177946,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2780795097351074,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.744140625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.351616758108139,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03475993871688843,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03475993871688843,
"step": 170
},
{
"calibration/aurc": 0.2589022891995325,
"calibration/batch_distribution_entropy": 0.9807805330573605,
"calibration/batch_entropy_100bins": 0.9685901941968815,
"calibration/batch_entropy_10bins": 0.9807805330573605,
"calibration/batch_entropy_50bins": 0.977349875364441,
"calibration/batch_uniqueness": 0.9529144287109375,
"calibration/buffer_distribution_entropy": 0.9982041576732013,
"calibration/buffer_entropy_100bins": 0.99861205511106,
"calibration/buffer_entropy_10bins": 0.9982041576732013,
"calibration/buffer_entropy_50bins": 0.9986009396983668,
"calibration/confidence_entropy": 0.487472109955439,
"calibration/coverage@0%": 0.082421875,
"calibration/coverage@1%": 0.09140625,
"calibration/coverage@10%": 0.228125,
"calibration/coverage@15%": 0.305859375,
"calibration/coverage@20%": 0.354296875,
"calibration/coverage@25%": 0.4875,
"calibration/coverage@30%": 0.617578125,
"calibration/coverage@5%": 0.18359375,
"calibration/ece": 0.11482706723425573,
"calibration/mean_confidence": 0.5064478468870471,
"calibration/prompt_uniqueness": 0.852490234375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 447.6,
"completions/max_terminated_length": 447.6,
"completions/mean_length": 188.24775390625,
"completions/mean_terminated_length": 188.24775390625,
"completions/min_length": 98.6,
"completions/min_terminated_length": 98.6,
"epoch": 0.56,
"grad_norm": 0.0009781933622434735,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 589357083.0,
"reward": 0.8498636603355407,
"reward_std": 0.09095648676156998,
"rewards/accuracy_reward": 0.526171875,
"rewards/brier_reward": 0.8025665879249573,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0028386770747601984,
"rewards/frontier_coverage_1": 0.11719954907894134,
"rewards/frontier_coverage_10": 0.11714765727519989,
"rewards/frontier_coverage_15": 0.11693512350320816,
"rewards/frontier_coverage_20": 0.11193432807922363,
"rewards/frontier_coverage_25": 0.07785176485776901,
"rewards/frontier_coverage_5": 0.11719954907894134,
"rewards/frontier_ece_reward": 0.005842031445354223,
"rewards/frontier_entropy_batch_reward": -0.2080444574356079,
"signal/accuracy_reward/centered_abs_mean": 0.091064453125,
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
"signal/accuracy_reward/group_std_mean": 0.12434282898902893,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0455322265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455322265625,
"signal/advantage_abs_mean": 0.07068178355693817,
"signal/advantage_pre_scale_abs_mean": 0.07068178355693817,
"signal/advantage_pre_scale_std": 0.10720473378896714,
"signal/advantage_std": 0.10720473378896714,
"signal/brier_reward/centered_abs_mean": 0.12168123424053193,
"signal/brier_reward/group_bin_occupancy": 0.83828125,
"signal/brier_reward/group_std_mean": 0.1573301523923874,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01521015428006649,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01521015428006649,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028686066623777153,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716015625,
"signal/frontier_aurc_reward/group_std_mean": 0.004602818004786968,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.134805833222345e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.134805833222345e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1588267892599106,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_1/group_std_mean": 0.2065411925315857,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002842999389395118,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002842999389395118,
"signal/frontier_coverage_10/centered_abs_mean": 0.15874530375003815,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_10/group_std_mean": 0.20643724501132965,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002841540891677141,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002841540891677141,
"signal/frontier_coverage_15/centered_abs_mean": 0.15840073227882384,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_15/group_std_mean": 0.20599766075611115,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002835373068228364,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002835373068228364,
"signal/frontier_coverage_20/centered_abs_mean": 0.14993982166051864,
"signal/frontier_coverage_20/group_bin_occupancy": 0.846484375,
"signal/frontier_coverage_20/group_std_mean": 0.19521004855632781,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026839226484298706,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026839226484298706,
"signal/frontier_coverage_25/centered_abs_mean": 0.09466438889503478,
"signal/frontier_coverage_25/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_25/group_std_mean": 0.12403950989246368,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001694492483511567,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001694492483511567,
"signal/frontier_coverage_5/centered_abs_mean": 0.1588267892599106,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_5/group_std_mean": 0.2065411925315857,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002842999389395118,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002842999389395118,
"signal/frontier_ece_reward/centered_abs_mean": 0.008115557208657264,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8796875,
"signal/frontier_ece_reward/group_std_mean": 0.01066547017544508,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.001014444651082158,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.001014444651082158,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.274140340089798,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.735546875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34681135416030884,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426754251122475,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426754251122475,
"step": 175
},
{
"calibration/aurc": 0.28829373194083824,
"calibration/batch_distribution_entropy": 0.9841373980772424,
"calibration/batch_entropy_100bins": 0.9681536817665121,
"calibration/batch_entropy_10bins": 0.9841373980772424,
"calibration/batch_entropy_50bins": 0.9792620166416952,
"calibration/batch_uniqueness": 0.9536224365234375,
"calibration/buffer_distribution_entropy": 0.998297768682672,
"calibration/buffer_entropy_100bins": 0.9986629530424844,
"calibration/buffer_entropy_10bins": 0.998297768682672,
"calibration/buffer_entropy_50bins": 0.9986413654675284,
"calibration/confidence_entropy": 0.48650558811910427,
"calibration/coverage@0%": 0.019140625,
"calibration/coverage@1%": 0.019140625,
"calibration/coverage@10%": 0.109765625,
"calibration/coverage@15%": 0.1859375,
"calibration/coverage@20%": 0.351171875,
"calibration/coverage@25%": 0.4765625,
"calibration/coverage@30%": 0.573828125,
"calibration/coverage@5%": 0.079296875,
"calibration/ece": 0.10345090124198557,
"calibration/mean_confidence": 0.4916340363422075,
"calibration/prompt_uniqueness": 0.84775390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 865.6,
"completions/max_terminated_length": 421.2,
"completions/mean_length": 185.69755859375,
"completions/mean_terminated_length": 185.43428955078124,
"completions/min_length": 93.6,
"completions/min_terminated_length": 93.6,
"epoch": 0.576,
"grad_norm": 0.0009465691982768476,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 606445250.0,
"reward": 0.8475932121276856,
"reward_std": 0.08987626880407333,
"rewards/accuracy_reward": 0.5220703125,
"rewards/brier_reward": 0.7915264964103699,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0030013061594218017,
"rewards/frontier_coverage_1": 0.12116425856947899,
"rewards/frontier_coverage_10": 0.12115439549088478,
"rewards/frontier_coverage_15": 0.12109048813581466,
"rewards/frontier_coverage_20": 0.11619948148727417,
"rewards/frontier_coverage_25": 0.07896296977996826,
"rewards/frontier_coverage_5": 0.12116425856947899,
"rewards/frontier_ece_reward": 0.005212780460715294,
"rewards/frontier_entropy_batch_reward": -0.20040208101272583,
"signal/accuracy_reward/centered_abs_mean": 0.09073486328125,
"signal/accuracy_reward/group_bin_occupancy": 0.172265625,
"signal/accuracy_reward/group_std_mean": 0.12525396645069123,
"signal/accuracy_reward/group_zero_std_frac": 0.621875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045367431640625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045367431640625,
"signal/advantage_abs_mean": 0.06918673142790795,
"signal/advantage_pre_scale_abs_mean": 0.06918673142790795,
"signal/advantage_pre_scale_std": 0.1060228943824768,
"signal/advantage_std": 0.1060228943824768,
"signal/brier_reward/centered_abs_mean": 0.12269987463951111,
"signal/brier_reward/group_bin_occupancy": 0.836328125,
"signal/brier_reward/group_std_mean": 0.158928182721138,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015337484329938889,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.015337484329938889,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0027809354942291975,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.708203125,
"signal/frontier_aurc_reward/group_std_mean": 0.004555220529437065,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.9778743414208296e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.9778743414208296e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16433233320713042,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_1/group_std_mean": 0.21237687766551971,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002941548731178045,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002941548731178045,
"signal/frontier_coverage_10/centered_abs_mean": 0.16424158215522766,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_10/group_std_mean": 0.2122596561908722,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029399242252111436,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029399242252111436,
"signal/frontier_coverage_15/centered_abs_mean": 0.16389403641223907,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_15/group_std_mean": 0.2118108332157135,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002933703176677227,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002933703176677227,
"signal/frontier_coverage_20/centered_abs_mean": 0.15368525087833404,
"signal/frontier_coverage_20/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_20/group_std_mean": 0.19865911304950715,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027509658131748436,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027509658131748436,
"signal/frontier_coverage_25/centered_abs_mean": 0.09549619555473328,
"signal/frontier_coverage_25/group_bin_occupancy": 0.87265625,
"signal/frontier_coverage_25/group_std_mean": 0.12401848435401916,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017093818169087172,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017093818169087172,
"signal/frontier_coverage_5/centered_abs_mean": 0.16433233320713042,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_5/group_std_mean": 0.21237687766551971,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002941548731178045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002941548731178045,
"signal/frontier_ece_reward/centered_abs_mean": 0.007696983031928539,
"signal/frontier_ece_reward/group_bin_occupancy": 0.880078125,
"signal/frontier_ece_reward/group_std_mean": 0.01010540798306465,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009621228789910674,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009621228789910674,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2615818977355957,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33764955401420593,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03269773721694946,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03269773721694946,
"step": 180
},
{
"calibration/aurc": 0.28347549862131505,
"calibration/batch_distribution_entropy": 0.980011133919852,
"calibration/batch_entropy_100bins": 0.9688122443099946,
"calibration/batch_entropy_10bins": 0.980011133919852,
"calibration/batch_entropy_50bins": 0.9770535189448722,
"calibration/batch_uniqueness": 0.9536908440564685,
"calibration/buffer_distribution_entropy": 0.9983131546794096,
"calibration/buffer_entropy_100bins": 0.9986737401524538,
"calibration/buffer_entropy_10bins": 0.9983131546794096,
"calibration/buffer_entropy_50bins": 0.9986542446646809,
"calibration/confidence_entropy": 0.47803387557048865,
"calibration/coverage@0%": 0.0633347602739726,
"calibration/coverage@1%": 0.0633347602739726,
"calibration/coverage@10%": 0.2251460066046967,
"calibration/coverage@15%": 0.3561093444227006,
"calibration/coverage@20%": 0.48078828277886493,
"calibration/coverage@25%": 0.5538772015655578,
"calibration/coverage@30%": 0.628125,
"calibration/coverage@5%": 0.11884326076320939,
"calibration/ece": 0.12117534786829438,
"calibration/mean_confidence": 0.4956464571020572,
"calibration/prompt_uniqueness": 0.8459324800013007,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 899.6,
"completions/max_terminated_length": 447.2,
"completions/mean_length": 181.71953125,
"completions/mean_terminated_length": 181.45542602539064,
"completions/min_length": 94.0,
"completions/min_terminated_length": 94.0,
"epoch": 0.592,
"grad_norm": 0.0011055340291932225,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 623473770.0,
"reward": 0.8469202160835266,
"reward_std": 0.08769658207893372,
"rewards/accuracy_reward": 0.5228515625,
"rewards/brier_reward": 0.7960270881652832,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.002862662449479103,
"rewards/frontier_coverage_1": 0.1255490630865097,
"rewards/frontier_coverage_10": 0.12550750821828843,
"rewards/frontier_coverage_15": 0.12535116225481033,
"rewards/frontier_coverage_20": 0.11695131063461303,
"rewards/frontier_coverage_25": 0.07410136461257935,
"rewards/frontier_coverage_5": 0.1255490630865097,
"rewards/frontier_ece_reward": 0.005222787708044052,
"rewards/frontier_entropy_batch_reward": -0.2153420329093933,
"signal/accuracy_reward/centered_abs_mean": 0.09036865234375,
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
"signal/accuracy_reward/group_std_mean": 0.12226001918315887,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045184326171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045184326171875,
"signal/advantage_abs_mean": 0.06837325692176818,
"signal/advantage_pre_scale_abs_mean": 0.06837325692176818,
"signal/advantage_pre_scale_std": 0.10427495390176773,
"signal/advantage_std": 0.10427495390176773,
"signal/brier_reward/centered_abs_mean": 0.11565729826688767,
"signal/brier_reward/group_bin_occupancy": 0.840625,
"signal/brier_reward/group_std_mean": 0.14894945323467254,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014457162283360959,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014457162283360959,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026661295210942625,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.714453125,
"signal/frontier_aurc_reward/group_std_mean": 0.004355709021911025,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7723716852488e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7723716852488e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16364216804504395,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_1/group_std_mean": 0.2090536832809448,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029291946906596423,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029291946906596423,
"signal/frontier_coverage_10/centered_abs_mean": 0.163558030128479,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_10/group_std_mean": 0.20894888639450074,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002927688602358103,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002927688602358103,
"signal/frontier_coverage_15/centered_abs_mean": 0.1632261723279953,
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
"signal/frontier_coverage_15/group_std_mean": 0.20853422582149506,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002921748394146562,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002921748394146562,
"signal/frontier_coverage_20/centered_abs_mean": 0.15006764531135558,
"signal/frontier_coverage_20/group_bin_occupancy": 0.860546875,
"signal/frontier_coverage_20/group_std_mean": 0.19213563203811646,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026862107682973147,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026862107682973147,
"signal/frontier_coverage_25/centered_abs_mean": 0.08844952881336213,
"signal/frontier_coverage_25/group_bin_occupancy": 0.880078125,
"signal/frontier_coverage_25/group_std_mean": 0.11421704292297363,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015832465374842285,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015832465374842285,
"signal/frontier_coverage_5/centered_abs_mean": 0.16364216804504395,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_5/group_std_mean": 0.2090536832809448,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029291946906596423,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029291946906596423,
"signal/frontier_ece_reward/centered_abs_mean": 0.007427510805428028,
"signal/frontier_ece_reward/group_bin_occupancy": 0.890234375,
"signal/frontier_ece_reward/group_std_mean": 0.009686007350683212,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009284388506785035,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009284388506785035,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2786764442920685,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3537044942378998,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03483455553650856,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03483455553650856,
"step": 185
},
{
"calibration/aurc": 0.23715687440798652,
"calibration/batch_distribution_entropy": 0.9721351094227396,
"calibration/batch_entropy_100bins": 0.9636682399300932,
"calibration/batch_entropy_10bins": 0.9721351094227396,
"calibration/batch_entropy_50bins": 0.9708812273088254,
"calibration/batch_uniqueness": 0.9519918907307791,
"calibration/buffer_distribution_entropy": 0.9983129150316932,
"calibration/buffer_entropy_100bins": 0.9986866998459007,
"calibration/buffer_entropy_10bins": 0.9983129150316932,
"calibration/buffer_entropy_50bins": 0.9986616817533553,
"calibration/confidence_entropy": 0.4812024814334549,
"calibration/coverage@0%": 0.04922257216242661,
"calibration/coverage@1%": 0.05976944716242662,
"calibration/coverage@10%": 0.22308815435420745,
"calibration/coverage@15%": 0.3262383806262231,
"calibration/coverage@20%": 0.47001360689823873,
"calibration/coverage@25%": 0.5852846746575342,
"calibration/coverage@30%": 0.7067835738747554,
"calibration/coverage@5%": 0.1296913221624266,
"calibration/ece": 0.10479527222992176,
"calibration/mean_confidence": 0.4769995498007412,
"calibration/prompt_uniqueness": 0.84160999691077,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 663.6,
"completions/max_terminated_length": 431.6,
"completions/mean_length": 178.928125,
"completions/mean_terminated_length": 178.79558715820312,
"completions/min_length": 87.0,
"completions/min_terminated_length": 87.0,
"epoch": 0.608,
"grad_norm": 0.0008596270345151424,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 640305482.0,
"reward": 0.8485802054405213,
"reward_std": 0.08481966853141784,
"rewards/accuracy_reward": 0.520703125,
"rewards/brier_reward": 0.8082961440086365,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002439832640811801,
"rewards/frontier_coverage_1": 0.14013661593198776,
"rewards/frontier_coverage_10": 0.14008433520793914,
"rewards/frontier_coverage_15": 0.1397281616926193,
"rewards/frontier_coverage_20": 0.12882789671421052,
"rewards/frontier_coverage_25": 0.08048931509256363,
"rewards/frontier_coverage_5": 0.14013533443212509,
"rewards/frontier_ece_reward": 0.005031970608979463,
"rewards/frontier_entropy_batch_reward": -0.216937056183815,
"signal/accuracy_reward/centered_abs_mean": 0.0880615234375,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.1184210166335106,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04403076171875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04403076171875,
"signal/advantage_abs_mean": 0.06550839766860009,
"signal/advantage_pre_scale_abs_mean": 0.06550839766860009,
"signal/advantage_pre_scale_std": 0.09949304610490799,
"signal/advantage_std": 0.09949304610490799,
"signal/brier_reward/centered_abs_mean": 0.11575733423233033,
"signal/brier_reward/group_bin_occupancy": 0.84140625,
"signal/brier_reward/group_std_mean": 0.1480691760778427,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01446966677904129,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01446966677904129,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002259706752374768,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.721484375,
"signal/frontier_aurc_reward/group_std_mean": 0.0036888211499899624,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.04487487685401e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.04487487685401e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16878078281879424,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_1/group_std_mean": 0.21479279398918152,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0030211757868528364,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030211757868528364,
"signal/frontier_coverage_10/centered_abs_mean": 0.16870121657848358,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_10/group_std_mean": 0.21468909978866577,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030197515618056057,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030197515618056057,
"signal/frontier_coverage_15/centered_abs_mean": 0.16824153959751129,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.2140854448080063,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030115234199911355,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030115234199911355,
"signal/frontier_coverage_20/centered_abs_mean": 0.1515140563249588,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_20/group_std_mean": 0.19256215989589692,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027121015824377536,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027121015824377536,
"signal/frontier_coverage_25/centered_abs_mean": 0.08549174815416336,
"signal/frontier_coverage_25/group_bin_occupancy": 0.89453125,
"signal/frontier_coverage_25/group_std_mean": 0.10916633754968644,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015303022461012005,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015303022461012005,
"signal/frontier_coverage_5/centered_abs_mean": 0.16878008842468262,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_5/group_std_mean": 0.21479184925556183,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030211633536964657,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030211633536964657,
"signal/frontier_ece_reward/centered_abs_mean": 0.006747147906571627,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8875,
"signal/frontier_ece_reward/group_std_mean": 0.00879486370831728,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008433934883214534,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008433934883214534,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2829363703727722,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7359375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3577865481376648,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035367046296596524,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035367046296596524,
"step": 190
},
{
"calibration/aurc": 0.24532003825815796,
"calibration/batch_distribution_entropy": 0.981679767990388,
"calibration/batch_entropy_100bins": 0.9709472161360031,
"calibration/batch_entropy_10bins": 0.981679767990388,
"calibration/batch_entropy_50bins": 0.9798056822605836,
"calibration/batch_uniqueness": 0.95435791015625,
"calibration/buffer_distribution_entropy": 0.9985313752795584,
"calibration/buffer_entropy_100bins": 0.9988054011236678,
"calibration/buffer_entropy_10bins": 0.9985313752795584,
"calibration/buffer_entropy_50bins": 0.9987929314111103,
"calibration/confidence_entropy": 0.5201001362906903,
"calibration/coverage@0%": 0.03359375,
"calibration/coverage@1%": 0.03359375,
"calibration/coverage@10%": 0.201953125,
"calibration/coverage@15%": 0.341796875,
"calibration/coverage@20%": 0.436328125,
"calibration/coverage@25%": 0.52265625,
"calibration/coverage@30%": 0.613671875,
"calibration/coverage@5%": 0.112109375,
"calibration/ece": 0.10123137926063848,
"calibration/mean_confidence": 0.49137512856978677,
"calibration/prompt_uniqueness": 0.84775390625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 422.8,
"completions/max_terminated_length": 422.8,
"completions/mean_length": 180.31982421875,
"completions/mean_terminated_length": 180.31982421875,
"completions/min_length": 93.8,
"completions/min_terminated_length": 93.8,
"epoch": 0.624,
"grad_norm": 0.001026191283017397,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 657495861.0,
"reward": 0.8518246412277222,
"reward_std": 0.08756706416606903,
"rewards/accuracy_reward": 0.52275390625,
"rewards/brier_reward": 0.8068322658538818,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002647676505148411,
"rewards/frontier_coverage_1": 0.12973806113004685,
"rewards/frontier_coverage_10": 0.12968083024024962,
"rewards/frontier_coverage_15": 0.12917735427618027,
"rewards/frontier_coverage_20": 0.11514810025691986,
"rewards/frontier_coverage_25": 0.06962493434548378,
"rewards/frontier_coverage_5": 0.12973549515008925,
"rewards/frontier_ece_reward": 0.004124377947300672,
"rewards/frontier_entropy_batch_reward": -0.18768059611320495,
"signal/accuracy_reward/centered_abs_mean": 0.093621826171875,
"signal/accuracy_reward/group_bin_occupancy": 0.16796875,
"signal/accuracy_reward/group_std_mean": 0.12246521413326264,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0468109130859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0468109130859375,
"signal/advantage_abs_mean": 0.06893313750624656,
"signal/advantage_pre_scale_abs_mean": 0.06893313750624656,
"signal/advantage_pre_scale_std": 0.10512781888246536,
"signal/advantage_std": 0.10512781888246536,
"signal/brier_reward/centered_abs_mean": 0.10718954056501388,
"signal/brier_reward/group_bin_occupancy": 0.858984375,
"signal/brier_reward/group_std_mean": 0.1384373813867569,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013398692570626735,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013398692570626735,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0022487165872007607,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728515625,
"signal/frontier_aurc_reward/group_std_mean": 0.0035695353988558056,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.0252025792142375e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.0252025792142375e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15328652858734132,
"signal/frontier_coverage_1/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_1/group_std_mean": 0.19674740433692933,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002743828808888793,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002743828808888793,
"signal/frontier_coverage_10/centered_abs_mean": 0.1532078802585602,
"signal/frontier_coverage_10/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_10/group_std_mean": 0.19664531350135803,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002742420881986618,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002742420881986618,
"signal/frontier_coverage_15/centered_abs_mean": 0.1525299906730652,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_15/group_std_mean": 0.1957621306180954,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027302867732942105,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027302867732942105,
"signal/frontier_coverage_20/centered_abs_mean": 0.129715932905674,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88046875,
"signal/frontier_coverage_20/group_std_mean": 0.16696780920028687,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0023219150956720115,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023219150956720115,
"signal/frontier_coverage_25/centered_abs_mean": 0.06962908133864402,
"signal/frontier_coverage_25/group_bin_occupancy": 0.90078125,
"signal/frontier_coverage_25/group_std_mean": 0.09048426896333694,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001246360526420176,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001246360526420176,
"signal/frontier_coverage_5/centered_abs_mean": 0.15328298211097718,
"signal/frontier_coverage_5/group_bin_occupancy": 0.877734375,
"signal/frontier_coverage_5/group_std_mean": 0.19674279391765595,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002743765339255333,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002743765339255333,
"signal/frontier_ece_reward/centered_abs_mean": 0.00600477633997798,
"signal/frontier_ece_reward/group_bin_occupancy": 0.878515625,
"signal/frontier_ece_reward/group_std_mean": 0.007920240703970193,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007505970424972475,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007505970424972475,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2654553234577179,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73515625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3388149976730347,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033181915432214736,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033181915432214736,
"step": 195
},
{
"calibration/aurc": 0.24430388293357136,
"calibration/batch_distribution_entropy": 0.9791797972652356,
"calibration/batch_entropy_100bins": 0.9679413992157719,
"calibration/batch_entropy_10bins": 0.9791797972652356,
"calibration/batch_entropy_50bins": 0.975538565518886,
"calibration/batch_uniqueness": 0.9525665283203125,
"calibration/buffer_distribution_entropy": 0.9987113525336898,
"calibration/buffer_entropy_100bins": 0.998895678573958,
"calibration/buffer_entropy_10bins": 0.9987113525336898,
"calibration/buffer_entropy_50bins": 0.9989227369806599,
"calibration/confidence_entropy": 0.5174565045374463,
"calibration/coverage@0%": 0.07578125,
"calibration/coverage@1%": 0.111328125,
"calibration/coverage@10%": 0.309375,
"calibration/coverage@15%": 0.365234375,
"calibration/coverage@20%": 0.4078125,
"calibration/coverage@25%": 0.548828125,
"calibration/coverage@30%": 0.664453125,
"calibration/coverage@5%": 0.22578125,
"calibration/ece": 0.16490499070074144,
"calibration/mean_confidence": 0.5123002672000428,
"calibration/prompt_uniqueness": 0.852099609375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 622.0,
"completions/max_terminated_length": 479.4,
"completions/mean_length": 184.83974609375,
"completions/mean_terminated_length": 183.78404846191407,
"completions/min_length": 97.4,
"completions/min_terminated_length": 97.4,
"epoch": 0.64,
"grad_norm": 0.0008454410126432776,
"learning_rate": 1e-06,
"loss": 0.0014,
"num_tokens": 674731308.0,
"reward": 0.8708350419998169,
"reward_std": 0.08193039745092393,
"rewards/accuracy_reward": 0.56953125,
"rewards/brier_reward": 0.8082751274108887,
"rewards/format_reward": 0.99921875,
"rewards/frontier_aurc_reward": -0.002218431932851672,
"rewards/frontier_coverage_1": 0.09933431595563888,
"rewards/frontier_coverage_10": 0.09924045875668526,
"rewards/frontier_coverage_15": 0.09875798150897026,
"rewards/frontier_coverage_20": 0.08629776164889336,
"rewards/frontier_coverage_25": 0.056573347002267835,
"rewards/frontier_coverage_5": 0.09933282062411308,
"rewards/frontier_ece_reward": 0.003817522618919611,
"rewards/frontier_entropy_batch_reward": -0.19735628366470337,
"signal/accuracy_reward/centered_abs_mean": 0.07532958984375,
"signal/accuracy_reward/group_bin_occupancy": 0.1640625,
"signal/accuracy_reward/group_std_mean": 0.10438980013132096,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.037664794921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037664794921875,
"signal/advantage_abs_mean": 0.06335262283682823,
"signal/advantage_pre_scale_abs_mean": 0.06335262283682823,
"signal/advantage_pre_scale_std": 0.09740418940782547,
"signal/advantage_std": 0.09740418940782547,
"signal/brier_reward/centered_abs_mean": 0.10196209698915482,
"signal/brier_reward/group_bin_occupancy": 0.84765625,
"signal/brier_reward/group_std_mean": 0.13130579739809037,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012745262123644352,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012745262123644352,
"signal/format_reward/centered_abs_mean": 0.001171875,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.001374816708266735,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0005859375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005859375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020356971537694333,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.718359375,
"signal/frontier_aurc_reward/group_std_mean": 0.003529385570436716,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.643897762231063e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.643897762231063e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.139412322640419,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.18100157380104065,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002495480561628938,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002495480561628938,
"signal/frontier_coverage_10/centered_abs_mean": 0.13926379680633544,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.18081148266792296,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002492821915075183,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002492821915075183,
"signal/frontier_coverage_15/centered_abs_mean": 0.13798445761203765,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_15/group_std_mean": 0.17917191088199616,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024699217174202204,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024699217174202204,
"signal/frontier_coverage_20/centered_abs_mean": 0.11161820888519287,
"signal/frontier_coverage_20/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_20/group_std_mean": 0.1451725423336029,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019979658536612988,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019979658536612988,
"signal/frontier_coverage_25/centered_abs_mean": 0.061739873886108396,
"signal/frontier_coverage_25/group_bin_occupancy": 0.89921875,
"signal/frontier_coverage_25/group_std_mean": 0.08028749227523804,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011051436886191368,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011051436886191368,
"signal/frontier_coverage_5/centered_abs_mean": 0.13940848410129547,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_5/group_std_mean": 0.1809966504573822,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002495411830022931,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002495411830022931,
"signal/frontier_ece_reward/centered_abs_mean": 0.005760820955038071,
"signal/frontier_ece_reward/group_bin_occupancy": 0.866796875,
"signal/frontier_ece_reward/group_std_mean": 0.00775869581848383,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007201026193797589,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007201026193797589,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2656015157699585,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34027169942855834,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03320018947124481,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03320018947124481,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.41854034890006203,
"eval_calibration/batch_distribution_entropy": 0.9258790022452184,
"eval_calibration/batch_entropy_100bins": 0.7155226945000874,
"eval_calibration/batch_entropy_10bins": 0.9258790022452184,
"eval_calibration/batch_entropy_50bins": 0.7976394251687033,
"eval_calibration/batch_uniqueness": 0.8994140625,
"eval_calibration/buffer_distribution_entropy": 0.9987820695130332,
"eval_calibration/buffer_entropy_100bins": 0.9989213084455153,
"eval_calibration/buffer_entropy_10bins": 0.9987820695130332,
"eval_calibration/buffer_entropy_50bins": 0.9989669617425297,
"eval_calibration/confidence_entropy": 0.49184587531394314,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.1328125,
"eval_calibration/coverage@15%": 0.171875,
"eval_calibration/coverage@20%": 0.1953125,
"eval_calibration/coverage@25%": 0.2265625,
"eval_calibration/coverage@30%": 0.2578125,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.17095216040466216,
"eval_calibration/mean_confidence": 0.45492840413455565,
"eval_calibration/prompt_uniqueness": 0.8994140625,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 295.25,
"eval_completions/max_terminated_length": 295.25,
"eval_completions/mean_length": 179.74158096313477,
"eval_completions/mean_terminated_length": 179.74158096313477,
"eval_completions/min_length": 109.75,
"eval_completions/min_terminated_length": 109.75,
"eval_loss": 0.0,
"eval_num_tokens": 674731308.0,
"eval_reward": 0.7049643099308014,
"eval_reward_std": 0.22184203192591667,
"eval_rewards/accuracy_reward": 0.423828125,
"eval_rewards/brier_reward": 0.7956392019987106,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0033663903595879674,
"eval_rewards/frontier_coverage_1": 0.19531626999378204,
"eval_rewards/frontier_coverage_10": 0.1952117159962654,
"eval_rewards/frontier_coverage_15": 0.19375700131058693,
"eval_rewards/frontier_coverage_20": 0.15371991135179996,
"eval_rewards/frontier_coverage_25": 0.08286740258336067,
"eval_rewards/frontier_coverage_5": 0.19531207531690598,
"eval_rewards/frontier_ece_reward": 0.003727212024386972,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 17.1514,
"eval_samples_per_second": 29.152,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4744873046875,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49458901584148407,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23724365234375,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23724365234375,
"eval_signal/advantage_abs_mean": 0.2057364284992218,
"eval_signal/advantage_pre_scale_abs_mean": 0.2057364284992218,
"eval_signal/advantage_pre_scale_std": 0.21934344619512558,
"eval_signal/advantage_std": 0.21934344619512558,
"eval_signal/brier_reward/centered_abs_mean": 0.1897713765501976,
"eval_signal/brier_reward/group_bin_occupancy": 0.8984375,
"eval_signal/brier_reward/group_std_mean": 0.24221712350845337,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0237214220687747,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.0237214220687747,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004152168636210263,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6640625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.007893728208728135,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.432381426042411e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.432381426042411e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3731478080153465,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 1.0,
"eval_signal/frontier_coverage_1/group_std_mean": 0.45107389986515045,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006679345387965441,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006679345387965441,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37288998067379,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 1.0,
"eval_signal/frontier_coverage_10/group_std_mean": 0.45077458769083023,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006674730451777577,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006674730451777577,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.36930492520332336,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 1.0,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4466145858168602,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0066105579026043415,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0066105579026043415,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.29113033413887024,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_20/group_std_mean": 0.354678250849247,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005211232579313219,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005211232579313219,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.13845044746994972,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9375,
"eval_signal/frontier_coverage_25/group_std_mean": 0.17535366117954254,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002478263049852103,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002478263049852103,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3731374442577362,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 1.0,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4510618671774864,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006679159821942449,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006679159821942449,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.007429954246617854,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.8671875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.010206094710156322,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0009287442808272317,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0009287442808272317,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.233,
"step": 200
},
{
"calibration/aurc": 0.4072680963308123,
"calibration/batch_distribution_entropy": 0.9719537361453303,
"calibration/batch_entropy_100bins": 0.9645036495344895,
"calibration/batch_entropy_10bins": 0.9719537361453303,
"calibration/batch_entropy_50bins": 0.972271453991881,
"calibration/batch_uniqueness": 0.9524566650390625,
"calibration/buffer_distribution_entropy": 0.9988068526161396,
"calibration/buffer_entropy_100bins": 0.9989106275315882,
"calibration/buffer_entropy_10bins": 0.9988068526161396,
"calibration/buffer_entropy_50bins": 0.9989733950683586,
"calibration/confidence_entropy": 0.5230128680468962,
"calibration/coverage@0%": 0.003515625,
"calibration/coverage@1%": 0.003515625,
"calibration/coverage@10%": 0.009765625,
"calibration/coverage@15%": 0.019140625,
"calibration/coverage@20%": 0.0875,
"calibration/coverage@25%": 0.137890625,
"calibration/coverage@30%": 0.26171875,
"calibration/coverage@5%": 0.003515625,
"calibration/ece": 0.1075798181494427,
"calibration/mean_confidence": 0.4706070682994466,
"calibration/prompt_uniqueness": 0.853369140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 614.6,
"completions/max_terminated_length": 390.6,
"completions/mean_length": 182.33017578125,
"completions/mean_terminated_length": 182.19835510253907,
"completions/min_length": 91.2,
"completions/min_terminated_length": 91.2,
"epoch": 0.656,
"grad_norm": 0.001009272993542254,
"learning_rate": 1e-06,
"loss": 0.0002,
"num_tokens": 691454913.0,
"reward": 0.8378146886825562,
"reward_std": 0.09044925570487976,
"rewards/accuracy_reward": 0.50419921875,
"rewards/brier_reward": 0.7853815197944641,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.003076691273599863,
"rewards/frontier_coverage_1": 0.1220921441912651,
"rewards/frontier_coverage_10": 0.1220327764749527,
"rewards/frontier_coverage_15": 0.12089861333370208,
"rewards/frontier_coverage_20": 0.09999236166477203,
"rewards/frontier_coverage_25": 0.05890063121914864,
"rewards/frontier_coverage_5": 0.12208605259656906,
"rewards/frontier_ece_reward": 0.003139182738959789,
"rewards/frontier_entropy_batch_reward": -0.19447652399539947,
"signal/accuracy_reward/centered_abs_mean": 0.098480224609375,
"signal/accuracy_reward/group_bin_occupancy": 0.171875,
"signal/accuracy_reward/group_std_mean": 0.12976800352334977,
"signal/accuracy_reward/group_zero_std_frac": 0.625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0492401123046875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0492401123046875,
"signal/advantage_abs_mean": 0.07178077697753907,
"signal/advantage_pre_scale_abs_mean": 0.07178077697753907,
"signal/advantage_pre_scale_std": 0.10841633677482605,
"signal/advantage_std": 0.10841633677482605,
"signal/brier_reward/centered_abs_mean": 0.11750788986682892,
"signal/brier_reward/group_bin_occupancy": 0.852734375,
"signal/brier_reward/group_std_mean": 0.15079601109027863,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014688486233353615,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014688486233353615,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0026406456716358663,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.719921875,
"signal/frontier_aurc_reward/group_std_mean": 0.004412023955956102,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7267557238228616e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7267557238228616e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16164307296276093,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_1/group_std_mean": 0.2075218141078949,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0028934108559042215,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028934108559042215,
"signal/frontier_coverage_10/centered_abs_mean": 0.16153694093227386,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_10/group_std_mean": 0.207388174533844,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002891511144116521,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002891511144116521,
"signal/frontier_coverage_15/centered_abs_mean": 0.15967210829257966,
"signal/frontier_coverage_15/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_15/group_std_mean": 0.20502502024173735,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028581305872648955,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028581305872648955,
"signal/frontier_coverage_20/centered_abs_mean": 0.12663253098726274,
"signal/frontier_coverage_20/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_20/group_std_mean": 0.16340535879135132,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002266722172498703,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002266722172498703,
"signal/frontier_coverage_25/centered_abs_mean": 0.0673256479203701,
"signal/frontier_coverage_25/group_bin_occupancy": 0.896875,
"signal/frontier_coverage_25/group_std_mean": 0.08757460862398148,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012051290133967996,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012051290133967996,
"signal/frontier_coverage_5/centered_abs_mean": 0.1616332322359085,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86875,
"signal/frontier_coverage_5/group_std_mean": 0.20750951170921325,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0028932347893714907,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028932347893714907,
"signal/frontier_ece_reward/centered_abs_mean": 0.0055423608049750325,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8828125,
"signal/frontier_ece_reward/group_std_mean": 0.007337391003966331,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006927951006218791,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006927951006218791,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.266719377040863,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.740625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3390504062175751,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03333992213010788,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333992213010788,
"step": 205
},
{
"calibration/aurc": 0.30184251919884036,
"calibration/batch_distribution_entropy": 0.9766400147465495,
"calibration/batch_entropy_100bins": 0.9661916960682403,
"calibration/batch_entropy_10bins": 0.9766400147465495,
"calibration/batch_entropy_50bins": 0.975390516981222,
"calibration/batch_uniqueness": 0.953192138671875,
"calibration/buffer_distribution_entropy": 0.9989337853009663,
"calibration/buffer_entropy_100bins": 0.9989487435236883,
"calibration/buffer_entropy_10bins": 0.9989337853009663,
"calibration/buffer_entropy_50bins": 0.9990317545857466,
"calibration/confidence_entropy": 0.4864500476998418,
"calibration/coverage@0%": 0.019140625,
"calibration/coverage@1%": 0.019140625,
"calibration/coverage@10%": 0.166796875,
"calibration/coverage@15%": 0.203515625,
"calibration/coverage@20%": 0.25,
"calibration/coverage@25%": 0.29921875,
"calibration/coverage@30%": 0.41796875,
"calibration/coverage@5%": 0.112890625,
"calibration/ece": 0.14628865248087672,
"calibration/mean_confidence": 0.4938824967603755,
"calibration/prompt_uniqueness": 0.848876953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 387.6,
"completions/max_terminated_length": 387.6,
"completions/mean_length": 181.459765625,
"completions/mean_terminated_length": 181.459765625,
"completions/min_length": 90.0,
"completions/min_terminated_length": 90.0,
"epoch": 0.672,
"grad_norm": 0.0008657427970319986,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 708226501.0,
"reward": 0.8453529119491577,
"reward_std": 0.08392495959997177,
"rewards/accuracy_reward": 0.51845703125,
"rewards/brier_reward": 0.7956361413002014,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002716613910160959,
"rewards/frontier_coverage_1": 0.1360724911093712,
"rewards/frontier_coverage_10": 0.13601431995630264,
"rewards/frontier_coverage_15": 0.13472481966018676,
"rewards/frontier_coverage_20": 0.11215179413557053,
"rewards/frontier_coverage_25": 0.0678616002202034,
"rewards/frontier_coverage_5": 0.13606539219617844,
"rewards/frontier_ece_reward": 0.0036563334055244924,
"rewards/frontier_entropy_batch_reward": -0.2134263336658478,
"signal/accuracy_reward/centered_abs_mean": 0.093804931640625,
"signal/accuracy_reward/group_bin_occupancy": 0.17109375,
"signal/accuracy_reward/group_std_mean": 0.12641526907682418,
"signal/accuracy_reward/group_zero_std_frac": 0.63125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0469024658203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0469024658203125,
"signal/advantage_abs_mean": 0.06465236023068428,
"signal/advantage_pre_scale_abs_mean": 0.06465236023068428,
"signal/advantage_pre_scale_std": 0.0998497799038887,
"signal/advantage_std": 0.0998497799038887,
"signal/brier_reward/centered_abs_mean": 0.11606302261352539,
"signal/brier_reward/group_bin_occupancy": 0.844140625,
"signal/brier_reward/group_std_mean": 0.14761213660240174,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014507877826690673,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014507877826690673,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023735316237434743,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.742578125,
"signal/frontier_aurc_reward/group_std_mean": 0.003954212227836251,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.2486215534154326e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.2486215534154326e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1699829190969467,
"signal/frontier_coverage_1/group_bin_occupancy": 0.851171875,
"signal/frontier_coverage_1/group_std_mean": 0.21657621562480928,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003042694181203842,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003042694181203842,
"signal/frontier_coverage_10/centered_abs_mean": 0.1698471039533615,
"signal/frontier_coverage_10/group_bin_occupancy": 0.851171875,
"signal/frontier_coverage_10/group_std_mean": 0.21641322374343872,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003040262870490551,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003040262870490551,
"signal/frontier_coverage_15/centered_abs_mean": 0.16724947690963746,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.213279390335083,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029937655199319124,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029937655199319124,
"signal/frontier_coverage_20/centered_abs_mean": 0.12853406816720964,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8453125,
"signal/frontier_coverage_20/group_std_mean": 0.1650033712387085,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002300759730860591,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002300759730860591,
"signal/frontier_coverage_25/centered_abs_mean": 0.07163915932178497,
"signal/frontier_coverage_25/group_bin_occupancy": 0.901953125,
"signal/frontier_coverage_25/group_std_mean": 0.09214308261871337,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001282340893521905,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001282340893521905,
"signal/frontier_coverage_5/centered_abs_mean": 0.16996634304523467,
"signal/frontier_coverage_5/group_bin_occupancy": 0.851171875,
"signal/frontier_coverage_5/group_std_mean": 0.2165563225746155,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003042397554963827,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003042397554963827,
"signal/frontier_ece_reward/centered_abs_mean": 0.0058284570463001725,
"signal/frontier_ece_reward/group_bin_occupancy": 0.867578125,
"signal/frontier_ece_reward/group_std_mean": 0.007644351571798325,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007285571307875216,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007285571307875216,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624157965183258,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3384029269218445,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032801974564790726,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032801974564790726,
"step": 210
},
{
"calibration/aurc": 0.32277610924710637,
"calibration/batch_distribution_entropy": 0.9701296713733163,
"calibration/batch_entropy_100bins": 0.9644057869188065,
"calibration/batch_entropy_10bins": 0.9701296713733163,
"calibration/batch_entropy_50bins": 0.9714994429885904,
"calibration/batch_uniqueness": 0.951641845703125,
"calibration/buffer_distribution_entropy": 0.9989417647738268,
"calibration/buffer_entropy_100bins": 0.9989298633022411,
"calibration/buffer_entropy_10bins": 0.9989417647738268,
"calibration/buffer_entropy_50bins": 0.9990118814717297,
"calibration/confidence_entropy": 0.513002509873567,
"calibration/coverage@0%": 0.01171875,
"calibration/coverage@1%": 0.01171875,
"calibration/coverage@10%": 0.151953125,
"calibration/coverage@15%": 0.240234375,
"calibration/coverage@20%": 0.323046875,
"calibration/coverage@25%": 0.4640625,
"calibration/coverage@30%": 0.544921875,
"calibration/coverage@5%": 0.037890625,
"calibration/ece": 0.12407527470589289,
"calibration/mean_confidence": 0.4836047739947819,
"calibration/prompt_uniqueness": 0.8462890625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 860.4,
"completions/max_terminated_length": 406.2,
"completions/mean_length": 185.49267578125,
"completions/mean_terminated_length": 185.22888793945313,
"completions/min_length": 92.2,
"completions/min_terminated_length": 92.2,
"epoch": 0.688,
"grad_norm": 0.0011569778434932232,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 725079866.0,
"reward": 0.8533730626106262,
"reward_std": 0.09096147418022156,
"rewards/accuracy_reward": 0.5375,
"rewards/brier_reward": 0.7997807264328003,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0025234234519302843,
"rewards/frontier_coverage_1": 0.1166018046438694,
"rewards/frontier_coverage_10": 0.11651684194803238,
"rewards/frontier_coverage_15": 0.11543264091014863,
"rewards/frontier_coverage_20": 0.08750456124544144,
"rewards/frontier_coverage_25": 0.05482863634824753,
"rewards/frontier_coverage_5": 0.11660146117210388,
"rewards/frontier_ece_reward": 0.002992427349090576,
"rewards/frontier_entropy_batch_reward": -0.21163803935050965,
"signal/accuracy_reward/centered_abs_mean": 0.0998291015625,
"signal/accuracy_reward/group_bin_occupancy": 0.1765625,
"signal/accuracy_reward/group_std_mean": 0.13668281584978104,
"signal/accuracy_reward/group_zero_std_frac": 0.5875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04991455078125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04991455078125,
"signal/advantage_abs_mean": 0.06996657401323318,
"signal/advantage_pre_scale_abs_mean": 0.06996657401323318,
"signal/advantage_pre_scale_std": 0.10759487152099609,
"signal/advantage_std": 0.10759487152099609,
"signal/brier_reward/centered_abs_mean": 0.10977463126182556,
"signal/brier_reward/group_bin_occupancy": 0.861328125,
"signal/brier_reward/group_std_mean": 0.14089445173740386,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013721828907728195,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013721828907728195,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020935308886691926,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.746484375,
"signal/frontier_aurc_reward/group_std_mean": 0.0034752024803310633,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.747420341824181e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.747420341824181e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.16087363958358764,
"signal/frontier_coverage_1/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_1/group_std_mean": 0.20536437928676604,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002879638038575649,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002879638038575649,
"signal/frontier_coverage_10/centered_abs_mean": 0.16079167425632476,
"signal/frontier_coverage_10/group_bin_occupancy": 0.866015625,
"signal/frontier_coverage_10/group_std_mean": 0.20525703132152556,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002878170693293214,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002878170693293214,
"signal/frontier_coverage_15/centered_abs_mean": 0.15650815665721893,
"signal/frontier_coverage_15/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_15/group_std_mean": 0.19971639513969422,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028014959301799537,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028014959301799537,
"signal/frontier_coverage_20/centered_abs_mean": 0.1123495414853096,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8609375,
"signal/frontier_coverage_20/group_std_mean": 0.14411205649375916,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020110567333176733,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020110567333176733,
"signal/frontier_coverage_25/centered_abs_mean": 0.06338529288768768,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9078125,
"signal/frontier_coverage_25/group_std_mean": 0.0813615933060646,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0011345966951921583,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011345966951921583,
"signal/frontier_coverage_5/centered_abs_mean": 0.16086728274822235,
"signal/frontier_coverage_5/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_5/group_std_mean": 0.20535596311092377,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002879524324089289,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002879524324089289,
"signal/frontier_ece_reward/centered_abs_mean": 0.0054166271351277825,
"signal/frontier_ece_reward/group_bin_occupancy": 0.882421875,
"signal/frontier_ece_reward/group_std_mean": 0.00712386667728424,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006770783918909728,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006770783918909728,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2808054625988007,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35399608612060546,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035100682824850085,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035100682824850085,
"step": 215
},
{
"calibration/aurc": 0.2567679202323507,
"calibration/batch_distribution_entropy": 0.9753718666465045,
"calibration/batch_entropy_100bins": 0.9653751735288918,
"calibration/batch_entropy_10bins": 0.9753718666465045,
"calibration/batch_entropy_50bins": 0.9753353539912812,
"calibration/batch_uniqueness": 0.9522705078125,
"calibration/buffer_distribution_entropy": 0.9989540322458701,
"calibration/buffer_entropy_100bins": 0.9989665872140143,
"calibration/buffer_entropy_10bins": 0.9989540322458701,
"calibration/buffer_entropy_50bins": 0.9990419447527051,
"calibration/confidence_entropy": 0.4933685141452767,
"calibration/coverage@0%": 0.008203125,
"calibration/coverage@1%": 0.008203125,
"calibration/coverage@10%": 0.11015625,
"calibration/coverage@15%": 0.191015625,
"calibration/coverage@20%": 0.31171875,
"calibration/coverage@25%": 0.5234375,
"calibration/coverage@30%": 0.6765625,
"calibration/coverage@5%": 0.024609375,
"calibration/ece": 0.10514110855139244,
"calibration/mean_confidence": 0.5225538825879033,
"calibration/prompt_uniqueness": 0.837548828125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 687.4,
"completions/max_terminated_length": 469.4,
"completions/mean_length": 189.19560546875,
"completions/mean_terminated_length": 189.06415710449218,
"completions/min_length": 97.0,
"completions/min_terminated_length": 97.0,
"epoch": 0.704,
"grad_norm": 0.00079206726513803,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 741883373.0,
"reward": 0.8564110398292542,
"reward_std": 0.08603468835353852,
"rewards/accuracy_reward": 0.5404296875,
"rewards/brier_reward": 0.8099352717399597,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002810010826215148,
"rewards/frontier_coverage_1": 0.11663352549076081,
"rewards/frontier_coverage_10": 0.11659030914306641,
"rewards/frontier_coverage_15": 0.1124075010418892,
"rewards/frontier_coverage_20": 0.08771874606609345,
"rewards/frontier_coverage_25": 0.0565977543592453,
"rewards/frontier_coverage_5": 0.11662895604968071,
"rewards/frontier_ece_reward": 0.0033508235588669776,
"rewards/frontier_entropy_batch_reward": -0.20978534519672393,
"signal/accuracy_reward/centered_abs_mean": 0.082080078125,
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
"signal/accuracy_reward/group_std_mean": 0.10941672474145889,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0410400390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0410400390625,
"signal/advantage_abs_mean": 0.06718932390213013,
"signal/advantage_pre_scale_abs_mean": 0.06718932390213013,
"signal/advantage_pre_scale_std": 0.10292920172214508,
"signal/advantage_std": 0.10292920172214508,
"signal/brier_reward/centered_abs_mean": 0.10521638691425324,
"signal/brier_reward/group_bin_occupancy": 0.8578125,
"signal/brier_reward/group_std_mean": 0.13554594218730925,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013152048364281655,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013152048364281655,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024594481103122233,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74765625,
"signal/frontier_aurc_reward/group_std_mean": 0.004012216068804264,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.402412014314905e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.402412014314905e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14190192222595216,
"signal/frontier_coverage_1/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_1/group_std_mean": 0.18161689043045043,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025400443468242885,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025400443468242885,
"signal/frontier_coverage_10/centered_abs_mean": 0.14177892506122589,
"signal/frontier_coverage_10/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_10/group_std_mean": 0.18146247267723084,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002537842746824026,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002537842746824026,
"signal/frontier_coverage_15/centered_abs_mean": 0.1324725031852722,
"signal/frontier_coverage_15/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_15/group_std_mean": 0.16975606381893157,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023712576366961002,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023712576366961002,
"signal/frontier_coverage_20/centered_abs_mean": 0.09445251375436783,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_20/group_std_mean": 0.12184825539588928,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001690700021572411,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001690700021572411,
"signal/frontier_coverage_25/centered_abs_mean": 0.05518615916371346,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925,
"signal/frontier_coverage_25/group_std_mean": 0.07120932638645172,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009878322365693749,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009878322365693749,
"signal/frontier_coverage_5/centered_abs_mean": 0.1418927103281021,
"signal/frontier_coverage_5/group_bin_occupancy": 0.874609375,
"signal/frontier_coverage_5/group_std_mean": 0.18160516917705535,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025398793630301954,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025398793630301954,
"signal/frontier_ece_reward/centered_abs_mean": 0.005274960119277239,
"signal/frontier_ece_reward/group_bin_occupancy": 0.878515625,
"signal/frontier_ece_reward/group_std_mean": 0.006980370450764895,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006593700149096548,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006593700149096548,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28154911994934084,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.733203125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3582507610321045,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.035193639993667605,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035193639993667605,
"step": 220
},
{
"calibration/aurc": 0.23126784031942532,
"calibration/batch_distribution_entropy": 0.9914821225669396,
"calibration/batch_entropy_100bins": 0.9745556431012833,
"calibration/batch_entropy_10bins": 0.9914821225669396,
"calibration/batch_entropy_50bins": 0.9834161123379925,
"calibration/batch_uniqueness": 0.9553955078125,
"calibration/buffer_distribution_entropy": 0.9990940525630616,
"calibration/buffer_entropy_100bins": 0.9990462171221427,
"calibration/buffer_entropy_10bins": 0.9990940525630616,
"calibration/buffer_entropy_50bins": 0.9991333015296009,
"calibration/confidence_entropy": 0.4974279364345352,
"calibration/coverage@0%": 0.08125,
"calibration/coverage@1%": 0.1296875,
"calibration/coverage@10%": 0.297265625,
"calibration/coverage@15%": 0.3734375,
"calibration/coverage@20%": 0.453125,
"calibration/coverage@25%": 0.551171875,
"calibration/coverage@30%": 0.6421875,
"calibration/coverage@5%": 0.225390625,
"calibration/ece": 0.13818892162141455,
"calibration/mean_confidence": 0.528279888360925,
"calibration/prompt_uniqueness": 0.8396484375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 392.4,
"completions/max_terminated_length": 392.4,
"completions/mean_length": 193.5751953125,
"completions/mean_terminated_length": 193.5751953125,
"completions/min_length": 102.0,
"completions/min_terminated_length": 102.0,
"epoch": 0.72,
"grad_norm": 0.0008994463132694364,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 758875439.0,
"reward": 0.867469334602356,
"reward_std": 0.08709415346384049,
"rewards/accuracy_reward": 0.563671875,
"rewards/brier_reward": 0.8105340003967285,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002341646375134587,
"rewards/frontier_coverage_1": 0.10286648273468017,
"rewards/frontier_coverage_10": 0.10283031612634659,
"rewards/frontier_coverage_15": 0.09793300032615662,
"rewards/frontier_coverage_20": 0.07525258213281631,
"rewards/frontier_coverage_25": 0.05333108454942703,
"rewards/frontier_coverage_5": 0.10286374539136886,
"rewards/frontier_ece_reward": 0.003090843977406621,
"rewards/frontier_entropy_batch_reward": -0.20484532713890075,
"signal/accuracy_reward/centered_abs_mean": 0.08983154296875,
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
"signal/accuracy_reward/group_std_mean": 0.12185031622648239,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044915771484375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.044915771484375,
"signal/advantage_abs_mean": 0.06714712977409362,
"signal/advantage_pre_scale_abs_mean": 0.06714712977409362,
"signal/advantage_pre_scale_std": 0.10354482531547546,
"signal/advantage_std": 0.10354482531547546,
"signal/brier_reward/centered_abs_mean": 0.10361835062503814,
"signal/brier_reward/group_bin_occupancy": 0.8609375,
"signal/brier_reward/group_std_mean": 0.1339139461517334,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012952293828129768,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012952293828129768,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002101215533912182,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74453125,
"signal/frontier_aurc_reward/group_std_mean": 0.0033895236440002917,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.7611756488331595e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.7611756488331595e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14568218886852263,
"signal/frontier_coverage_1/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_1/group_std_mean": 0.18670837283134462,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002607711125165224,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002607711125165224,
"signal/frontier_coverage_10/centered_abs_mean": 0.14543514251708983,
"signal/frontier_coverage_10/group_bin_occupancy": 0.873828125,
"signal/frontier_coverage_10/group_std_mean": 0.1864002525806427,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026032889261841776,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026032889261841776,
"signal/frontier_coverage_15/centered_abs_mean": 0.1325514554977417,
"signal/frontier_coverage_15/group_bin_occupancy": 0.870703125,
"signal/frontier_coverage_15/group_std_mean": 0.17030819058418273,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023726709187030792,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023726709187030792,
"signal/frontier_coverage_20/centered_abs_mean": 0.0897199884057045,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8765625,
"signal/frontier_coverage_20/group_std_mean": 0.11619268357753754,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016059877583757044,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016059877583757044,
"signal/frontier_coverage_25/centered_abs_mean": 0.054404760152101515,
"signal/frontier_coverage_25/group_bin_occupancy": 0.928515625,
"signal/frontier_coverage_25/group_std_mean": 0.06992583870887756,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009738451801240445,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009738451801240445,
"signal/frontier_coverage_5/centered_abs_mean": 0.14567132890224457,
"signal/frontier_coverage_5/group_bin_occupancy": 0.873046875,
"signal/frontier_coverage_5/group_std_mean": 0.18669503033161164,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026075166650116445,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026075166650116445,
"signal/frontier_ece_reward/centered_abs_mean": 0.0052463172003626825,
"signal/frontier_ece_reward/group_bin_occupancy": 0.901953125,
"signal/frontier_ece_reward/group_std_mean": 0.006824824120849371,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006557896500453353,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006557896500453353,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2725887656211853,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.736328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3432928442955017,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03407359570264816,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03407359570264816,
"step": 225
},
{
"calibration/aurc": 0.251207175508105,
"calibration/batch_distribution_entropy": 0.9803575481156237,
"calibration/batch_entropy_100bins": 0.96926214899048,
"calibration/batch_entropy_10bins": 0.9803575481156237,
"calibration/batch_entropy_50bins": 0.9778726337355224,
"calibration/batch_uniqueness": 0.9536702349762987,
"calibration/buffer_distribution_entropy": 0.9991889846010944,
"calibration/buffer_entropy_100bins": 0.9990874643844343,
"calibration/buffer_entropy_10bins": 0.9991889846010944,
"calibration/buffer_entropy_50bins": 0.9991898926831168,
"calibration/confidence_entropy": 0.48241655738057865,
"calibration/coverage@0%": 0.022267153864970645,
"calibration/coverage@1%": 0.022267153864970645,
"calibration/coverage@10%": 0.15632491438356164,
"calibration/coverage@15%": 0.21420162671232879,
"calibration/coverage@20%": 0.4169658145792564,
"calibration/coverage@25%": 0.5560673006360078,
"calibration/coverage@30%": 0.6850255320450097,
"calibration/coverage@5%": 0.1152718321917808,
"calibration/ece": 0.13081064207628812,
"calibration/mean_confidence": 0.533058629028637,
"calibration/prompt_uniqueness": 0.8375652904689126,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 881.6,
"completions/max_terminated_length": 427.8,
"completions/mean_length": 198.65859375,
"completions/mean_terminated_length": 198.26717834472657,
"completions/min_length": 100.8,
"completions/min_terminated_length": 100.8,
"epoch": 0.736,
"grad_norm": 0.0006931371171958745,
"learning_rate": 1e-06,
"loss": 0.0005,
"num_tokens": 775849287.0,
"reward": 0.8676259756088257,
"reward_std": 0.08426170200109481,
"rewards/accuracy_reward": 0.5630859375,
"rewards/brier_reward": 0.8032041311264038,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.0026727572083473207,
"rewards/frontier_coverage_1": 0.10416304171085358,
"rewards/frontier_coverage_10": 0.10409975200891494,
"rewards/frontier_coverage_15": 0.09961197376251221,
"rewards/frontier_coverage_20": 0.07543607577681541,
"rewards/frontier_coverage_25": 0.0561057448387146,
"rewards/frontier_coverage_5": 0.10416053682565689,
"rewards/frontier_ece_reward": 0.002644325466826558,
"rewards/frontier_entropy_batch_reward": -0.19347001612186432,
"signal/accuracy_reward/centered_abs_mean": 0.0850830078125,
"signal/accuracy_reward/group_bin_occupancy": 0.1671875,
"signal/accuracy_reward/group_std_mean": 0.114646577835083,
"signal/accuracy_reward/group_zero_std_frac": 0.6625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04254150390625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04254150390625,
"signal/advantage_abs_mean": 0.06527714878320694,
"signal/advantage_pre_scale_abs_mean": 0.06527714878320694,
"signal/advantage_pre_scale_std": 0.10004038214683533,
"signal/advantage_std": 0.10004038214683533,
"signal/brier_reward/centered_abs_mean": 0.10927082747220992,
"signal/brier_reward/group_bin_occupancy": 0.831640625,
"signal/brier_reward/group_std_mean": 0.14201997220516205,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01365885343402624,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01365885343402624,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023929367307573557,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73125,
"signal/frontier_aurc_reward/group_std_mean": 0.0037743649911135433,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.283356502128299e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.283356502128299e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14819615185260773,
"signal/frontier_coverage_1/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_1/group_std_mean": 0.19186924695968627,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026527110021561384,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026527110021561384,
"signal/frontier_coverage_10/centered_abs_mean": 0.147468763589859,
"signal/frontier_coverage_10/group_bin_occupancy": 0.850390625,
"signal/frontier_coverage_10/group_std_mean": 0.1909423440694809,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026396906469017267,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026396906469017267,
"signal/frontier_coverage_15/centered_abs_mean": 0.1348020002245903,
"signal/frontier_coverage_15/group_bin_occupancy": 0.840234375,
"signal/frontier_coverage_15/group_std_mean": 0.17472992837429047,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024129556957632305,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024129556957632305,
"signal/frontier_coverage_20/centered_abs_mean": 0.08800848871469498,
"signal/frontier_coverage_20/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_20/group_std_mean": 0.11463980823755264,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015753519488498568,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015753519488498568,
"signal/frontier_coverage_25/centered_abs_mean": 0.055414053797721866,
"signal/frontier_coverage_25/group_bin_occupancy": 0.925390625,
"signal/frontier_coverage_25/group_std_mean": 0.07170938104391097,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000991911522578448,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000991911522578448,
"signal/frontier_coverage_5/centered_abs_mean": 0.14818698167800903,
"signal/frontier_coverage_5/group_bin_occupancy": 0.84921875,
"signal/frontier_coverage_5/group_std_mean": 0.19185736775398254,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026525467168539763,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026525467168539763,
"signal/frontier_ece_reward/centered_abs_mean": 0.005124002322554588,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
"signal/frontier_ece_reward/group_std_mean": 0.006701454985886812,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006405002903193235,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006405002903193235,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26818968653678893,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34574413299560547,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033523710817098616,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033523710817098616,
"step": 230
},
{
"calibration/aurc": 0.2622214479199293,
"calibration/batch_distribution_entropy": 0.9764551048609474,
"calibration/batch_entropy_100bins": 0.9666018818532802,
"calibration/batch_entropy_10bins": 0.9764551048609474,
"calibration/batch_entropy_50bins": 0.9763242677062596,
"calibration/batch_uniqueness": 0.9524139404296875,
"calibration/buffer_distribution_entropy": 0.9991939792089879,
"calibration/buffer_entropy_100bins": 0.9990739400539542,
"calibration/buffer_entropy_10bins": 0.9991939792089879,
"calibration/buffer_entropy_50bins": 0.9991816292648175,
"calibration/confidence_entropy": 0.4706510873405473,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.100390625,
"calibration/coverage@15%": 0.246484375,
"calibration/coverage@20%": 0.447265625,
"calibration/coverage@25%": 0.584765625,
"calibration/coverage@30%": 0.669140625,
"calibration/coverage@5%": 0.03515625,
"calibration/ece": 0.11216717108532222,
"calibration/mean_confidence": 0.48983524720107646,
"calibration/prompt_uniqueness": 0.8369140625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 411.8,
"completions/max_terminated_length": 411.8,
"completions/mean_length": 198.97177734375,
"completions/mean_terminated_length": 198.97177734375,
"completions/min_length": 105.6,
"completions/min_terminated_length": 105.6,
"epoch": 0.752,
"grad_norm": 0.0007598252850584686,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 793113958.0,
"reward": 0.8673793315887451,
"reward_std": 0.0860441878437996,
"rewards/accuracy_reward": 0.56337890625,
"rewards/brier_reward": 0.8044471979141236,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002977763069793582,
"rewards/frontier_coverage_1": 0.10541683062911034,
"rewards/frontier_coverage_10": 0.10495719313621521,
"rewards/frontier_coverage_15": 0.09651436656713486,
"rewards/frontier_coverage_20": 0.07021676413714886,
"rewards/frontier_coverage_25": 0.050591808184981345,
"rewards/frontier_coverage_5": 0.10540874376893043,
"rewards/frontier_ece_reward": 0.0025658421916887166,
"rewards/frontier_entropy_batch_reward": -0.1974082589149475,
"signal/accuracy_reward/centered_abs_mean": 0.083087158203125,
"signal/accuracy_reward/group_bin_occupancy": 0.168359375,
"signal/accuracy_reward/group_std_mean": 0.11437420845031739,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415435791015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0415435791015625,
"signal/advantage_abs_mean": 0.06670793667435646,
"signal/advantage_pre_scale_abs_mean": 0.06670793667435646,
"signal/advantage_pre_scale_std": 0.10405694842338561,
"signal/advantage_std": 0.10405694842338561,
"signal/brier_reward/centered_abs_mean": 0.1071990892291069,
"signal/brier_reward/group_bin_occupancy": 0.837890625,
"signal/brier_reward/group_std_mean": 0.13994504809379577,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013399886153638362,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013399886153638362,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00265240459702909,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.725390625,
"signal/frontier_aurc_reward/group_std_mean": 0.004353985376656056,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.7478038322879004e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.7478038322879004e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14081787765026094,
"signal/frontier_coverage_1/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_1/group_std_mean": 0.18302632570266725,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025206399615854023,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025206399615854023,
"signal/frontier_coverage_10/centered_abs_mean": 0.1402135133743286,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86953125,
"signal/frontier_coverage_10/group_std_mean": 0.1822360187768936,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002509821904823184,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002509821904823184,
"signal/frontier_coverage_15/centered_abs_mean": 0.12741587162017823,
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_15/group_std_mean": 0.16580144464969634,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022807438392192124,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022807438392192124,
"signal/frontier_coverage_20/centered_abs_mean": 0.08223778158426284,
"signal/frontier_coverage_20/group_bin_occupancy": 0.875,
"signal/frontier_coverage_20/group_std_mean": 0.10763536989688874,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014720562612637877,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014720562612637877,
"signal/frontier_coverage_25/centered_abs_mean": 0.053284359723329545,
"signal/frontier_coverage_25/group_bin_occupancy": 0.91953125,
"signal/frontier_coverage_25/group_std_mean": 0.06900968700647354,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009537900099530816,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009537900099530816,
"signal/frontier_coverage_5/centered_abs_mean": 0.1408083975315094,
"signal/frontier_coverage_5/group_bin_occupancy": 0.869140625,
"signal/frontier_coverage_5/group_std_mean": 0.18301377892494203,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025204701349139215,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025204701349139215,
"signal/frontier_ece_reward/centered_abs_mean": 0.005157566629350185,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89921875,
"signal/frontier_ece_reward/group_std_mean": 0.006740899570286274,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006446958286687732,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006446958286687732,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2672864556312561,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3404460310935974,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03341080695390701,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03341080695390701,
"step": 235
},
{
"calibration/aurc": 0.2681903946291124,
"calibration/batch_distribution_entropy": 0.9767026425312034,
"calibration/batch_entropy_100bins": 0.9688933764667013,
"calibration/batch_entropy_10bins": 0.9767026425312034,
"calibration/batch_entropy_50bins": 0.9750379284339221,
"calibration/batch_uniqueness": 0.9525848388671875,
"calibration/buffer_distribution_entropy": 0.999199839210462,
"calibration/buffer_entropy_100bins": 0.9990462741781064,
"calibration/buffer_entropy_10bins": 0.999199839210462,
"calibration/buffer_entropy_50bins": 0.9991680034614012,
"calibration/confidence_entropy": 0.5025959777866486,
"calibration/coverage@0%": 0.042578125,
"calibration/coverage@1%": 0.061328125,
"calibration/coverage@10%": 0.226171875,
"calibration/coverage@15%": 0.2875,
"calibration/coverage@20%": 0.3734375,
"calibration/coverage@25%": 0.491015625,
"calibration/coverage@30%": 0.584765625,
"calibration/coverage@5%": 0.177734375,
"calibration/ece": 0.14313831918485717,
"calibration/mean_confidence": 0.4804080079299814,
"calibration/prompt_uniqueness": 0.8513671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 403.4,
"completions/max_terminated_length": 403.4,
"completions/mean_length": 200.05576171875,
"completions/mean_terminated_length": 200.05576171875,
"completions/min_length": 102.2,
"completions/min_terminated_length": 102.2,
"epoch": 0.768,
"grad_norm": 0.0011492387857288122,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 810095233.0,
"reward": 0.8437476992607117,
"reward_std": 0.08492105603218078,
"rewards/accuracy_reward": 0.50966796875,
"rewards/brier_reward": 0.810984981060028,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002536864671856165,
"rewards/frontier_coverage_1": 0.14460064321756363,
"rewards/frontier_coverage_10": 0.14430496394634246,
"rewards/frontier_coverage_15": 0.13622619807720185,
"rewards/frontier_coverage_20": 0.0965993657708168,
"rewards/frontier_coverage_25": 0.05859274864196777,
"rewards/frontier_coverage_5": 0.14456866830587387,
"rewards/frontier_ece_reward": 0.002746673859655857,
"rewards/frontier_entropy_batch_reward": -0.20547258853912354,
"signal/accuracy_reward/centered_abs_mean": 0.081915283203125,
"signal/accuracy_reward/group_bin_occupancy": 0.166015625,
"signal/accuracy_reward/group_std_mean": 0.11158370226621628,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409576416015625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0409576416015625,
"signal/advantage_abs_mean": 0.06546520814299583,
"signal/advantage_pre_scale_abs_mean": 0.06546520814299583,
"signal/advantage_pre_scale_std": 0.10177824050188064,
"signal/advantage_std": 0.10177824050188064,
"signal/brier_reward/centered_abs_mean": 0.10417567193508148,
"signal/brier_reward/group_bin_occupancy": 0.845703125,
"signal/brier_reward/group_std_mean": 0.13512639403343202,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013021958991885185,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013021958991885185,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020107618300244214,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_aurc_reward/group_std_mean": 0.003275436395779252,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.599263691285159e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.599263691285159e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14825652539730072,
"signal/frontier_coverage_1/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_1/group_std_mean": 0.19205498099327087,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002653791708871722,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002653791708871722,
"signal/frontier_coverage_10/centered_abs_mean": 0.14757258892059327,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_10/group_std_mean": 0.19117499589920045,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002641549287363887,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641549287363887,
"signal/frontier_coverage_15/centered_abs_mean": 0.13492438793182374,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_15/group_std_mean": 0.1748009592294693,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024151464458554983,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024151464458554983,
"signal/frontier_coverage_20/centered_abs_mean": 0.0868727594614029,
"signal/frontier_coverage_20/group_bin_occupancy": 0.88203125,
"signal/frontier_coverage_20/group_std_mean": 0.11287190318107605,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015550222946330906,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015550222946330906,
"signal/frontier_coverage_25/centered_abs_mean": 0.05342138335108757,
"signal/frontier_coverage_25/group_bin_occupancy": 0.926171875,
"signal/frontier_coverage_25/group_std_mean": 0.06843771934509277,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009562427527271211,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009562427527271211,
"signal/frontier_coverage_5/centered_abs_mean": 0.1482018768787384,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_5/group_std_mean": 0.1919853150844574,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026528135407716037,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026528135407716037,
"signal/frontier_ece_reward/centered_abs_mean": 0.004641291126608849,
"signal/frontier_ece_reward/group_bin_occupancy": 0.895703125,
"signal/frontier_ece_reward/group_std_mean": 0.006130393128842115,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005801613908261061,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005801613908261061,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2680731534957886,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.72890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34347763657569885,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03350914418697357,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03350914418697357,
"step": 240
},
{
"calibration/aurc": 0.3410998255186805,
"calibration/batch_distribution_entropy": 0.98266861992294,
"calibration/batch_entropy_100bins": 0.9725637560170315,
"calibration/batch_entropy_10bins": 0.98266861992294,
"calibration/batch_entropy_50bins": 0.9805054265093982,
"calibration/batch_uniqueness": 0.9535858154296875,
"calibration/buffer_distribution_entropy": 0.9991640665932143,
"calibration/buffer_entropy_100bins": 0.9990458872496693,
"calibration/buffer_entropy_10bins": 0.9991640665932143,
"calibration/buffer_entropy_50bins": 0.9991495337630607,
"calibration/confidence_entropy": 0.47953550982848137,
"calibration/coverage@0%": 0.02890625,
"calibration/coverage@1%": 0.04296875,
"calibration/coverage@10%": 0.2109375,
"calibration/coverage@15%": 0.2875,
"calibration/coverage@20%": 0.32265625,
"calibration/coverage@25%": 0.34765625,
"calibration/coverage@30%": 0.365625,
"calibration/coverage@5%": 0.098828125,
"calibration/ece": 0.16965724304332572,
"calibration/mean_confidence": 0.4929179841997679,
"calibration/prompt_uniqueness": 0.83154296875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 863.4,
"completions/max_terminated_length": 431.8,
"completions/mean_length": 197.94599609375,
"completions/mean_terminated_length": 197.5545166015625,
"completions/min_length": 103.2,
"completions/min_terminated_length": 103.2,
"epoch": 0.784,
"grad_norm": 0.0008992942166514695,
"learning_rate": 1e-06,
"loss": 0.0007,
"num_tokens": 827296568.0,
"reward": 0.860960865020752,
"reward_std": 0.08637812584638596,
"rewards/accuracy_reward": 0.55595703125,
"rewards/brier_reward": 0.7818328976631165,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_aurc_reward": -0.002828824752941728,
"rewards/frontier_coverage_1": 0.08855738416314125,
"rewards/frontier_coverage_10": 0.08812275156378746,
"rewards/frontier_coverage_15": 0.08236327841877937,
"rewards/frontier_coverage_20": 0.059821216762065886,
"rewards/frontier_coverage_25": 0.043670283257961275,
"rewards/frontier_coverage_5": 0.08851732909679413,
"rewards/frontier_ece_reward": 0.0014453153213253244,
"rewards/frontier_entropy_batch_reward": -0.18243320286273956,
"signal/accuracy_reward/centered_abs_mean": 0.097711181640625,
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
"signal/accuracy_reward/group_std_mean": 0.12649333626031875,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0488555908203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0488555908203125,
"signal/advantage_abs_mean": 0.06811224520206452,
"signal/advantage_pre_scale_abs_mean": 0.06811224520206452,
"signal/advantage_pre_scale_std": 0.10434643775224686,
"signal/advantage_std": 0.10434643775224686,
"signal/brier_reward/centered_abs_mean": 0.11582219302654266,
"signal/brier_reward/group_bin_occupancy": 0.84453125,
"signal/brier_reward/group_std_mean": 0.14920888543128968,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014477774128317833,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014477774128317833,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_bin_occupancy": 0.126171875,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_aurc_reward/centered_abs_mean": 0.00234157289378345,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.741796875,
"signal/frontier_aurc_reward/group_std_mean": 0.00370767368003726,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.19141557358671e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.19141557358671e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1641725480556488,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_1/group_std_mean": 0.21050458252429963,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002938688499853015,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002938688499853015,
"signal/frontier_coverage_10/centered_abs_mean": 0.16328320205211638,
"signal/frontier_coverage_10/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_10/group_std_mean": 0.20939250588417052,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029227692633867265,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029227692633867265,
"signal/frontier_coverage_15/centered_abs_mean": 0.1482144132256508,
"signal/frontier_coverage_15/group_bin_occupancy": 0.848828125,
"signal/frontier_coverage_15/group_std_mean": 0.1904875546693802,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002653037803247571,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002653037803247571,
"signal/frontier_coverage_20/centered_abs_mean": 0.09080570191144943,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_20/group_std_mean": 0.11784365773200989,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016254220623522996,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016254220623522996,
"signal/frontier_coverage_25/centered_abs_mean": 0.05480258762836456,
"signal/frontier_coverage_25/group_bin_occupancy": 0.91015625,
"signal/frontier_coverage_25/group_std_mean": 0.07086438089609146,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000980966305360198,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000980966305360198,
"signal/frontier_coverage_5/centered_abs_mean": 0.16410693824291228,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_5/group_std_mean": 0.21042270064353943,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029375139623880387,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029375139623880387,
"signal/frontier_ece_reward/centered_abs_mean": 0.004935114085674286,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89140625,
"signal/frontier_ece_reward/group_std_mean": 0.006519688945263624,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006168892607092858,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006168892607092858,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.24725628197193145,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.737109375,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3199459671974182,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03090703524649143,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03090703524649143,
"step": 245
},
{
"calibration/aurc": 0.1940784264652428,
"calibration/batch_distribution_entropy": 0.9770675388500825,
"calibration/batch_entropy_100bins": 0.9662281299714202,
"calibration/batch_entropy_10bins": 0.9770675388500825,
"calibration/batch_entropy_50bins": 0.9742705536997672,
"calibration/batch_uniqueness": 0.9523101806640625,
"calibration/buffer_distribution_entropy": 0.999172449019247,
"calibration/buffer_entropy_100bins": 0.9990648204449872,
"calibration/buffer_entropy_10bins": 0.999172449019247,
"calibration/buffer_entropy_50bins": 0.9991687135590434,
"calibration/confidence_entropy": 0.4846538428188453,
"calibration/coverage@0%": 0.0546875,
"calibration/coverage@1%": 0.0546875,
"calibration/coverage@10%": 0.346484375,
"calibration/coverage@15%": 0.489453125,
"calibration/coverage@20%": 0.575,
"calibration/coverage@25%": 0.65234375,
"calibration/coverage@30%": 0.7375,
"calibration/coverage@5%": 0.1671875,
"calibration/ece": 0.10917596226614659,
"calibration/mean_confidence": 0.4966383387592807,
"calibration/prompt_uniqueness": 0.8314453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 653.8,
"completions/max_terminated_length": 424.2,
"completions/mean_length": 195.6087890625,
"completions/mean_terminated_length": 195.47777709960937,
"completions/min_length": 104.2,
"completions/min_terminated_length": 104.2,
"epoch": 0.8,
"grad_norm": 0.0010258163092657924,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 844310162.0,
"reward": 0.8748032927513123,
"reward_std": 0.08496512919664383,
"rewards/accuracy_reward": 0.58251953125,
"rewards/brier_reward": 0.8144230008125305,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002836526231840253,
"rewards/frontier_coverage_1": 0.10128066837787628,
"rewards/frontier_coverage_10": 0.10073070526123047,
"rewards/frontier_coverage_15": 0.09388678222894668,
"rewards/frontier_coverage_20": 0.06677651032805443,
"rewards/frontier_coverage_25": 0.05463530197739601,
"rewards/frontier_coverage_5": 0.10115833282470703,
"rewards/frontier_ece_reward": 0.002541623217985034,
"rewards/frontier_entropy_batch_reward": -0.2220643639564514,
"signal/accuracy_reward/centered_abs_mean": 0.084588623046875,
"signal/accuracy_reward/group_bin_occupancy": 0.16484375,
"signal/accuracy_reward/group_std_mean": 0.11226904094219207,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0422943115234375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422943115234375,
"signal/advantage_abs_mean": 0.06675532534718513,
"signal/advantage_pre_scale_abs_mean": 0.06675532534718513,
"signal/advantage_pre_scale_std": 0.10448435842990875,
"signal/advantage_std": 0.10448435842990875,
"signal/brier_reward/centered_abs_mean": 0.10213624089956283,
"signal/brier_reward/group_bin_occupancy": 0.83828125,
"signal/brier_reward/group_std_mean": 0.1328400731086731,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012767030112445354,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012767030112445354,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002544344821944833,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.728125,
"signal/frontier_aurc_reward/group_std_mean": 0.003990656137466431,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.554377155727707e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.554377155727707e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1359323427081108,
"signal/frontier_coverage_1/group_bin_occupancy": 0.841796875,
"signal/frontier_coverage_1/group_std_mean": 0.17886653840541838,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002433188818395138,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002433188818395138,
"signal/frontier_coverage_10/centered_abs_mean": 0.1348055586218834,
"signal/frontier_coverage_10/group_bin_occupancy": 0.841796875,
"signal/frontier_coverage_10/group_std_mean": 0.17739444077014924,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024130194447934627,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024130194447934627,
"signal/frontier_coverage_15/centered_abs_mean": 0.1202843114733696,
"signal/frontier_coverage_15/group_bin_occupancy": 0.83515625,
"signal/frontier_coverage_15/group_std_mean": 0.15877383649349214,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021530891302973033,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021530891302973033,
"signal/frontier_coverage_20/centered_abs_mean": 0.0718239963054657,
"signal/frontier_coverage_20/group_bin_occupancy": 0.85625,
"signal/frontier_coverage_20/group_std_mean": 0.09572341293096542,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012856494868174195,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012856494868174195,
"signal/frontier_coverage_25/centered_abs_mean": 0.051678837090730664,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923046875,
"signal/frontier_coverage_25/group_std_mean": 0.06689032912254333,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009250511298887432,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009250511298887432,
"signal/frontier_coverage_5/centered_abs_mean": 0.13572666347026824,
"signal/frontier_coverage_5/group_bin_occupancy": 0.842578125,
"signal/frontier_coverage_5/group_std_mean": 0.17859258353710175,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024295071605592968,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024295071605592968,
"signal/frontier_ece_reward/centered_abs_mean": 0.005082287080585957,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8890625,
"signal/frontier_ece_reward/group_std_mean": 0.006694659031927586,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006352858850732446,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006352858850732446,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2741507351398468,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73828125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34453503489494325,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03426884189248085,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03426884189248085,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4024781794204561,
"eval_calibration/batch_distribution_entropy": 0.9411803508728845,
"eval_calibration/batch_entropy_100bins": 0.722578085023462,
"eval_calibration/batch_entropy_10bins": 0.9411803508728845,
"eval_calibration/batch_entropy_50bins": 0.798684377132812,
"eval_calibration/batch_uniqueness": 0.8974609375,
"eval_calibration/buffer_distribution_entropy": 0.9992212146130883,
"eval_calibration/buffer_entropy_100bins": 0.9990995964084779,
"eval_calibration/buffer_entropy_10bins": 0.9992212146130883,
"eval_calibration/buffer_entropy_50bins": 0.9992052314450008,
"eval_calibration/confidence_entropy": 0.4720443419968122,
"eval_calibration/coverage@0%": 0.0703125,
"eval_calibration/coverage@1%": 0.0703125,
"eval_calibration/coverage@10%": 0.0703125,
"eval_calibration/coverage@15%": 0.203125,
"eval_calibration/coverage@20%": 0.2421875,
"eval_calibration/coverage@25%": 0.3125,
"eval_calibration/coverage@30%": 0.34375,
"eval_calibration/coverage@5%": 0.0703125,
"eval_calibration/ece": 0.17127804387863715,
"eval_calibration/mean_confidence": 0.4861035242434224,
"eval_calibration/prompt_uniqueness": 0.8974609375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 359.75,
"eval_completions/max_terminated_length": 359.75,
"eval_completions/mean_length": 197.09428787231445,
"eval_completions/mean_terminated_length": 197.09428787231445,
"eval_completions/min_length": 126.0,
"eval_completions/min_terminated_length": 126.0,
"eval_loss": 0.0,
"eval_num_tokens": 844310162.0,
"eval_reward": 0.7075212150812149,
"eval_reward_std": 0.23280686885118484,
"eval_rewards/accuracy_reward": 0.4296875,
"eval_rewards/brier_reward": 0.8027084320783615,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.0038928079302422702,
"eval_rewards/frontier_coverage_1": 0.200554970651865,
"eval_rewards/frontier_coverage_10": 0.19824624806642532,
"eval_rewards/frontier_coverage_15": 0.17718525603413582,
"eval_rewards/frontier_coverage_20": 0.10994750820100307,
"eval_rewards/frontier_coverage_25": 0.06260389927774668,
"eval_rewards/frontier_coverage_5": 0.19991321116685867,
"eval_rewards/frontier_ece_reward": 0.003450465912465006,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 19.3003,
"eval_samples_per_second": 25.906,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4765625,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.49562519043684006,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23828125,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23828125,
"eval_signal/advantage_abs_mean": 0.21785810217261314,
"eval_signal/advantage_pre_scale_abs_mean": 0.21785810217261314,
"eval_signal/advantage_pre_scale_std": 0.23014385625720024,
"eval_signal/advantage_std": 0.23014385625720024,
"eval_signal/brier_reward/centered_abs_mean": 0.1881571188569069,
"eval_signal/brier_reward/group_bin_occupancy": 0.875,
"eval_signal/brier_reward/group_std_mean": 0.23912940546870232,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02351963985711336,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02351963985711336,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.005338445422239602,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.6328125,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.009567599976435304,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 9.555817086948082e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 9.555817086948082e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.348846860229969,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_1/group_std_mean": 0.42089004069566727,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.006244358723051846,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.006244358723051846,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3454489931464195,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4168899804353714,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006183536606840789,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.006183536606840789,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.30945945531129837,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_coverage_15/group_std_mean": 0.37479550391435623,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005539324251003563,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005539324251003563,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1743907555937767,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_20/group_std_mean": 0.2171802930533886,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031215944909490645,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031215944909490645,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0898975171148777,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9296875,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11114342510700226,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016091655124910176,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016091655124910176,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.34793129563331604,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.96875,
"eval_signal/frontier_coverage_5/group_std_mean": 0.41981156170368195,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006227970006875694,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006227970006875694,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006496628629975021,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.9609375,
"eval_signal/frontier_ece_reward/group_std_mean": 0.008258524350821972,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0008120785787468776,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0008120785787468776,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.207,
"step": 250
},
{
"calibration/aurc": 0.20885883826229318,
"calibration/batch_distribution_entropy": 0.9617354323768474,
"calibration/batch_entropy_100bins": 0.9607130781173305,
"calibration/batch_entropy_10bins": 0.9617354323768474,
"calibration/batch_entropy_50bins": 0.9659121941892688,
"calibration/batch_uniqueness": 0.9494836297596694,
"calibration/buffer_distribution_entropy": 0.999124191511771,
"calibration/buffer_entropy_100bins": 0.999041988564324,
"calibration/buffer_entropy_10bins": 0.999124191511771,
"calibration/buffer_entropy_50bins": 0.9991497628306819,
"calibration/confidence_entropy": 0.46736913978754613,
"calibration/coverage@0%": 0.043359375,
"calibration/coverage@1%": 0.043359375,
"calibration/coverage@10%": 0.171484375,
"calibration/coverage@15%": 0.28758408757338555,
"calibration/coverage@20%": 0.557930987035225,
"calibration/coverage@25%": 0.7530630809686889,
"calibration/coverage@30%": 0.8226279659980431,
"calibration/coverage@5%": 0.102734375,
"calibration/ece": 0.14049068595229333,
"calibration/mean_confidence": 0.523476685252264,
"calibration/prompt_uniqueness": 0.8333644458084027,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 467.6,
"completions/max_terminated_length": 467.6,
"completions/mean_length": 194.86865234375,
"completions/mean_terminated_length": 194.86865234375,
"completions/min_length": 103.8,
"completions/min_terminated_length": 103.8,
"epoch": 0.816,
"grad_norm": 0.001114765414968133,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 861404785.0,
"reward": 0.8725868582725524,
"reward_std": 0.08807137310504913,
"rewards/accuracy_reward": 0.5794921875,
"rewards/brier_reward": 0.8018252968788147,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002854443807154894,
"rewards/frontier_coverage_1": 0.08887268304824829,
"rewards/frontier_coverage_10": 0.08834582418203354,
"rewards/frontier_coverage_15": 0.08466917127370835,
"rewards/frontier_coverage_20": 0.060265733301639555,
"rewards/frontier_coverage_25": 0.051320061832666394,
"rewards/frontier_coverage_5": 0.08884882032871247,
"rewards/frontier_ece_reward": 0.0020072998944669963,
"rewards/frontier_entropy_batch_reward": -0.2065118134021759,
"signal/accuracy_reward/centered_abs_mean": 0.09168701171875,
"signal/accuracy_reward/group_bin_occupancy": 0.170703125,
"signal/accuracy_reward/group_std_mean": 0.12381611913442611,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.045843505859375,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.045843505859375,
"signal/advantage_abs_mean": 0.06842098832130432,
"signal/advantage_pre_scale_abs_mean": 0.06842098832130432,
"signal/advantage_pre_scale_std": 0.10695004910230636,
"signal/advantage_std": 0.10695004910230636,
"signal/brier_reward/centered_abs_mean": 0.10891520380973815,
"signal/brier_reward/group_bin_occupancy": 0.833203125,
"signal/brier_reward/group_std_mean": 0.14026750177145003,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01361440047621727,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01361440047621727,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002719699405133724,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.71875,
"signal/frontier_aurc_reward/group_std_mean": 0.00452100308611989,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.8682617489248514e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.8682617489248514e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14558494091033936,
"signal/frontier_coverage_1/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_1/group_std_mean": 0.18775410056114197,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026059703435748816,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026059703435748816,
"signal/frontier_coverage_10/centered_abs_mean": 0.14347892701625825,
"signal/frontier_coverage_10/group_bin_occupancy": 0.858203125,
"signal/frontier_coverage_10/group_std_mean": 0.1850330114364624,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025682727340608836,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025682727340608836,
"signal/frontier_coverage_15/centered_abs_mean": 0.12778309732675552,
"signal/frontier_coverage_15/group_bin_occupancy": 0.85,
"signal/frontier_coverage_15/group_std_mean": 0.16491487622261047,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002287317393347621,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002287317393347621,
"signal/frontier_coverage_20/centered_abs_mean": 0.07579994648694992,
"signal/frontier_coverage_20/group_bin_occupancy": 0.884375,
"signal/frontier_coverage_20/group_std_mean": 0.09820334166288376,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013568190392106772,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013568190392106772,
"signal/frontier_coverage_25/centered_abs_mean": 0.05239018201828003,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92109375,
"signal/frontier_coverage_25/group_std_mean": 0.06699474751949311,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009377842419780791,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009377842419780791,
"signal/frontier_coverage_5/centered_abs_mean": 0.14545360803604127,
"signal/frontier_coverage_5/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_5/group_std_mean": 0.18758580982685089,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002603619499132037,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002603619499132037,
"signal/frontier_ece_reward/centered_abs_mean": 0.004751469660550356,
"signal/frontier_ece_reward/group_bin_occupancy": 0.9015625,
"signal/frontier_ece_reward/group_std_mean": 0.006205685343593359,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005939337075687945,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005939337075687945,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.265206840634346,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.730078125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33529953956604003,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03315085507929325,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03315085507929325,
"step": 255
},
{
"calibration/aurc": 0.26691739801367803,
"calibration/batch_distribution_entropy": 0.9774445841742269,
"calibration/batch_entropy_100bins": 0.9696266513555238,
"calibration/batch_entropy_10bins": 0.9774445841742269,
"calibration/batch_entropy_50bins": 0.9781202652767531,
"calibration/batch_uniqueness": 0.9535858154296875,
"calibration/buffer_distribution_entropy": 0.9989725991598203,
"calibration/buffer_entropy_100bins": 0.9989539238306406,
"calibration/buffer_entropy_10bins": 0.9989725991598203,
"calibration/buffer_entropy_50bins": 0.9990613562739397,
"calibration/confidence_entropy": 0.4988719882240712,
"calibration/coverage@0%": 0.03828125,
"calibration/coverage@1%": 0.03828125,
"calibration/coverage@10%": 0.25078125,
"calibration/coverage@15%": 0.294921875,
"calibration/coverage@20%": 0.372265625,
"calibration/coverage@25%": 0.455859375,
"calibration/coverage@30%": 0.5828125,
"calibration/coverage@5%": 0.15078125,
"calibration/ece": 0.10967759805603246,
"calibration/mean_confidence": 0.48722149702527523,
"calibration/prompt_uniqueness": 0.846044921875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 442.4,
"completions/max_terminated_length": 442.4,
"completions/mean_length": 196.14306640625,
"completions/mean_terminated_length": 196.14306640625,
"completions/min_length": 103.2,
"completions/min_terminated_length": 103.2,
"epoch": 0.832,
"grad_norm": 0.0010814516572281718,
"learning_rate": 1e-06,
"loss": -0.0001,
"num_tokens": 878421642.0,
"reward": 0.8648443460464478,
"reward_std": 0.08550989478826523,
"rewards/accuracy_reward": 0.55498046875,
"rewards/brier_reward": 0.8163813591003418,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0023734794463962316,
"rewards/frontier_coverage_1": 0.11868036091327668,
"rewards/frontier_coverage_10": 0.11617294400930404,
"rewards/frontier_coverage_15": 0.10371433347463607,
"rewards/frontier_coverage_20": 0.07033977434039115,
"rewards/frontier_coverage_25": 0.0553276963531971,
"rewards/frontier_coverage_5": 0.11858219057321548,
"rewards/frontier_ece_reward": 0.0023112162714824082,
"rewards/frontier_entropy_batch_reward": -0.20258863270282745,
"signal/accuracy_reward/centered_abs_mean": 0.090997314453125,
"signal/accuracy_reward/group_bin_occupancy": 0.169140625,
"signal/accuracy_reward/group_std_mean": 0.12111333757638931,
"signal/accuracy_reward/group_zero_std_frac": 0.646875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0454986572265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0454986572265625,
"signal/advantage_abs_mean": 0.06704051047563553,
"signal/advantage_pre_scale_abs_mean": 0.06704051047563553,
"signal/advantage_pre_scale_std": 0.10539929419755936,
"signal/advantage_std": 0.10539929419755936,
"signal/brier_reward/centered_abs_mean": 0.10057551860809326,
"signal/brier_reward/group_bin_occupancy": 0.841796875,
"signal/brier_reward/group_std_mean": 0.13067585229873657,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012571939826011657,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012571939826011657,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020601370837539435,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.734765625,
"signal/frontier_aurc_reward/group_std_mean": 0.0032557172700762747,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.687645366881043e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.687645366881043e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14384538531303406,
"signal/frontier_coverage_1/group_bin_occupancy": 0.858984375,
"signal/frontier_coverage_1/group_std_mean": 0.18559444546699524,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025748323649168016,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025748323649168016,
"signal/frontier_coverage_10/centered_abs_mean": 0.14038788378238679,
"signal/frontier_coverage_10/group_bin_occupancy": 0.85859375,
"signal/frontier_coverage_10/group_std_mean": 0.18115375339984893,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025129430461674927,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025129430461674927,
"signal/frontier_coverage_15/centered_abs_mean": 0.12292735427618026,
"signal/frontier_coverage_15/group_bin_occupancy": 0.855859375,
"signal/frontier_coverage_15/group_std_mean": 0.15857858061790467,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022003995720297096,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022003995720297096,
"signal/frontier_coverage_20/centered_abs_mean": 0.07247701585292816,
"signal/frontier_coverage_20/group_bin_occupancy": 0.891015625,
"signal/frontier_coverage_20/group_std_mean": 0.09377783834934235,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012973385397344827,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012973385397344827,
"signal/frontier_coverage_25/centered_abs_mean": 0.05139257907867432,
"signal/frontier_coverage_25/group_bin_occupancy": 0.932421875,
"signal/frontier_coverage_25/group_std_mean": 0.06542427986860275,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000919927132781595,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000919927132781595,
"signal/frontier_coverage_5/centered_abs_mean": 0.14371106922626495,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85859375,
"signal/frontier_coverage_5/group_std_mean": 0.1854223281145096,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025724280625581742,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025724280625581742,
"signal/frontier_ece_reward/centered_abs_mean": 0.004667305201292038,
"signal/frontier_ece_reward/group_bin_occupancy": 0.894921875,
"signal/frontier_ece_reward/group_std_mean": 0.006094491388648748,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005834131501615047,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005834131501615047,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2624901086091995,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.7328125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3327677011489868,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03281126357614994,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281126357614994,
"step": 260
},
{
"calibration/aurc": 0.31542229607064637,
"calibration/batch_distribution_entropy": 0.9739707310173376,
"calibration/batch_entropy_100bins": 0.9635504948025592,
"calibration/batch_entropy_10bins": 0.9739707310173376,
"calibration/batch_entropy_50bins": 0.9730236720183898,
"calibration/batch_uniqueness": 0.951666259765625,
"calibration/buffer_distribution_entropy": 0.9989930255604248,
"calibration/buffer_entropy_100bins": 0.9989505577332609,
"calibration/buffer_entropy_10bins": 0.9989930255604248,
"calibration/buffer_entropy_50bins": 0.999068960960001,
"calibration/confidence_entropy": 0.5086728375442844,
"calibration/coverage@0%": 0.0296875,
"calibration/coverage@1%": 0.0296875,
"calibration/coverage@10%": 0.120703125,
"calibration/coverage@15%": 0.2734375,
"calibration/coverage@20%": 0.417578125,
"calibration/coverage@25%": 0.491796875,
"calibration/coverage@30%": 0.5453125,
"calibration/coverage@5%": 0.102734375,
"calibration/ece": 0.1579181461275594,
"calibration/mean_confidence": 0.5313260932295083,
"calibration/prompt_uniqueness": 0.837255859375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 421.6,
"completions/max_terminated_length": 421.6,
"completions/mean_length": 194.3615234375,
"completions/mean_terminated_length": 194.3615234375,
"completions/min_length": 101.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.848,
"grad_norm": 0.0008651363314129412,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 895426272.0,
"reward": 0.8523078799247742,
"reward_std": 0.08377386629581451,
"rewards/accuracy_reward": 0.53427734375,
"rewards/brier_reward": 0.8082629799842834,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.002553269104100764,
"rewards/frontier_coverage_1": 0.11806271076202393,
"rewards/frontier_coverage_10": 0.11662331819534302,
"rewards/frontier_coverage_15": 0.10539236664772034,
"rewards/frontier_coverage_20": 0.06768357157707214,
"rewards/frontier_coverage_25": 0.04773269593715668,
"rewards/frontier_coverage_5": 0.1179862841963768,
"rewards/frontier_ece_reward": 0.0020762649830430744,
"rewards/frontier_entropy_batch_reward": -0.21074254512786866,
"signal/accuracy_reward/centered_abs_mean": 0.076470947265625,
"signal/accuracy_reward/group_bin_occupancy": 0.165625,
"signal/accuracy_reward/group_std_mean": 0.1066226527094841,
"signal/accuracy_reward/group_zero_std_frac": 0.675,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0382354736328125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0382354736328125,
"signal/advantage_abs_mean": 0.06452079713344575,
"signal/advantage_pre_scale_abs_mean": 0.06452079713344575,
"signal/advantage_pre_scale_std": 0.10078646242618561,
"signal/advantage_std": 0.10078646242618561,
"signal/brier_reward/centered_abs_mean": 0.095879465341568,
"signal/brier_reward/group_bin_occupancy": 0.86015625,
"signal/brier_reward/group_std_mean": 0.12415737211704254,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011984933167696,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.011984933167696,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002056886232458055,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73671875,
"signal/frontier_aurc_reward/group_std_mean": 0.0032547391252592205,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.681826237880159e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.681826237880159e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.13201110661029816,
"signal/frontier_coverage_1/group_bin_occupancy": 0.875,
"signal/frontier_coverage_1/group_std_mean": 0.1731318861246109,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023629988078027963,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023629988078027963,
"signal/frontier_coverage_10/centered_abs_mean": 0.12971103489398955,
"signal/frontier_coverage_10/group_bin_occupancy": 0.872265625,
"signal/frontier_coverage_10/group_std_mean": 0.17014427483081818,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00232182745821774,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00232182745821774,
"signal/frontier_coverage_15/centered_abs_mean": 0.11497683823108673,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_15/group_std_mean": 0.15072887837886811,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002058085426688194,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002058085426688194,
"signal/frontier_coverage_20/centered_abs_mean": 0.06714669689536094,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9,
"signal/frontier_coverage_20/group_std_mean": 0.08797992616891862,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001201925822533667,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001201925822533667,
"signal/frontier_coverage_25/centered_abs_mean": 0.04629442393779755,
"signal/frontier_coverage_25/group_bin_occupancy": 0.921484375,
"signal/frontier_coverage_25/group_std_mean": 0.059931250661611556,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008286701398901641,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008286701398901641,
"signal/frontier_coverage_5/centered_abs_mean": 0.1318788543343544,
"signal/frontier_coverage_5/group_bin_occupancy": 0.875,
"signal/frontier_coverage_5/group_std_mean": 0.17296003997325898,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0023606313858181237,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023606313858181237,
"signal/frontier_ece_reward/centered_abs_mean": 0.004494541138410568,
"signal/frontier_ece_reward/group_bin_occupancy": 0.894140625,
"signal/frontier_ece_reward/group_std_mean": 0.0058809550479054454,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.000561817642301321,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.000561817642301321,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.279247921705246,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.741015625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3494983911514282,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03490599021315575,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03490599021315575,
"step": 265
},
{
"calibration/aurc": 0.2551935935751549,
"calibration/batch_distribution_entropy": 0.9551683899262354,
"calibration/batch_entropy_100bins": 0.9561740490407103,
"calibration/batch_entropy_10bins": 0.9551683899262354,
"calibration/batch_entropy_50bins": 0.9628317632115422,
"calibration/batch_uniqueness": 0.9490325927734375,
"calibration/buffer_distribution_entropy": 0.9990927517675319,
"calibration/buffer_entropy_100bins": 0.9990242562445925,
"calibration/buffer_entropy_10bins": 0.9990927517675319,
"calibration/buffer_entropy_50bins": 0.9991437917105215,
"calibration/confidence_entropy": 0.49864896977334033,
"calibration/coverage@0%": 0.034375,
"calibration/coverage@1%": 0.034375,
"calibration/coverage@10%": 0.194921875,
"calibration/coverage@15%": 0.2421875,
"calibration/coverage@20%": 0.34765625,
"calibration/coverage@25%": 0.444140625,
"calibration/coverage@30%": 0.575,
"calibration/coverage@5%": 0.108203125,
"calibration/ece": 0.12198031278700468,
"calibration/mean_confidence": 0.5995220320808029,
"calibration/prompt_uniqueness": 0.8458984375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 647.8,
"completions/max_terminated_length": 428.6,
"completions/mean_length": 196.71005859375,
"completions/mean_terminated_length": 196.57922973632813,
"completions/min_length": 105.8,
"completions/min_terminated_length": 105.8,
"epoch": 0.864,
"grad_norm": 0.0012777691008523107,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 912427399.0,
"reward": 0.8713708758354187,
"reward_std": 0.09100723564624787,
"rewards/accuracy_reward": 0.587109375,
"rewards/brier_reward": 0.8016261577606201,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0026119566056877373,
"rewards/frontier_coverage_1": 0.07935620397329331,
"rewards/frontier_coverage_10": 0.07880553454160691,
"rewards/frontier_coverage_15": 0.07368464544415473,
"rewards/frontier_coverage_20": 0.05397990569472313,
"rewards/frontier_coverage_25": 0.04986085593700409,
"rewards/frontier_coverage_5": 0.07933037877082824,
"rewards/frontier_ece_reward": 0.0016980181448161603,
"rewards/frontier_entropy_batch_reward": -0.23946044743061065,
"signal/accuracy_reward/centered_abs_mean": 0.0991943359375,
"signal/accuracy_reward/group_bin_occupancy": 0.16953125,
"signal/accuracy_reward/group_std_mean": 0.12835633456707002,
"signal/accuracy_reward/group_zero_std_frac": 0.64375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04959716796875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04959716796875,
"signal/advantage_abs_mean": 0.07214201688766479,
"signal/advantage_pre_scale_abs_mean": 0.07214201688766479,
"signal/advantage_pre_scale_std": 0.10875225216150283,
"signal/advantage_std": 0.10875225216150283,
"signal/brier_reward/centered_abs_mean": 0.10874636620283126,
"signal/brier_reward/group_bin_occupancy": 0.8484375,
"signal/brier_reward/group_std_mean": 0.13965383768081666,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013593295775353908,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013593295775353908,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023859881330281496,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73984375,
"signal/frontier_aurc_reward/group_std_mean": 0.003907088562846184,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.270918434485793e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.270918434485793e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15109747648239136,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_1/group_std_mean": 0.19283765852451323,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00270464476197958,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00270464476197958,
"signal/frontier_coverage_10/centered_abs_mean": 0.149289670586586,
"signal/frontier_coverage_10/group_bin_occupancy": 0.86171875,
"signal/frontier_coverage_10/group_std_mean": 0.19051893651485444,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002672284934669733,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002672284934669733,
"signal/frontier_coverage_15/centered_abs_mean": 0.13000792711973191,
"signal/frontier_coverage_15/group_bin_occupancy": 0.851953125,
"signal/frontier_coverage_15/group_std_mean": 0.16591603457927703,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023271418176591396,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023271418176591396,
"signal/frontier_coverage_20/centered_abs_mean": 0.07444732487201691,
"signal/frontier_coverage_20/group_bin_occupancy": 0.896875,
"signal/frontier_coverage_20/group_std_mean": 0.09547023475170135,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013326070504263044,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013326070504263044,
"signal/frontier_coverage_25/centered_abs_mean": 0.05278872922062874,
"signal/frontier_coverage_25/group_bin_occupancy": 0.923046875,
"signal/frontier_coverage_25/group_std_mean": 0.06748096346855163,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449182078242302,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449182078242302,
"signal/frontier_coverage_5/centered_abs_mean": 0.15094164311885833,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8625,
"signal/frontier_coverage_5/group_std_mean": 0.19263845980167388,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002701855357736349,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002701855357736349,
"signal/frontier_ece_reward/centered_abs_mean": 0.004699286818504333,
"signal/frontier_ece_reward/group_bin_occupancy": 0.900390625,
"signal/frontier_ece_reward/group_std_mean": 0.006102659367024898,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005874108523130417,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005874108523130417,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29436487555503843,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726171875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3652419447898865,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.036795609444379804,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036795609444379804,
"step": 270
},
{
"calibration/aurc": 0.3689968283562816,
"calibration/batch_distribution_entropy": 0.9816647884171952,
"calibration/batch_entropy_100bins": 0.9690518445136315,
"calibration/batch_entropy_10bins": 0.9816647884171952,
"calibration/batch_entropy_50bins": 0.9786919422206465,
"calibration/batch_uniqueness": 0.9536102294921875,
"calibration/buffer_distribution_entropy": 0.9991432971050402,
"calibration/buffer_entropy_100bins": 0.9990850763393919,
"calibration/buffer_entropy_10bins": 0.9991432971050402,
"calibration/buffer_entropy_50bins": 0.9991792049124377,
"calibration/confidence_entropy": 0.47520039428209443,
"calibration/coverage@0%": 0.005859375,
"calibration/coverage@1%": 0.005859375,
"calibration/coverage@10%": 0.0296875,
"calibration/coverage@15%": 0.096484375,
"calibration/coverage@20%": 0.146875,
"calibration/coverage@25%": 0.214453125,
"calibration/coverage@30%": 0.278515625,
"calibration/coverage@5%": 0.02265625,
"calibration/ece": 0.1265384011632356,
"calibration/mean_confidence": 0.5028521730859784,
"calibration/prompt_uniqueness": 0.831884765625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 452.8,
"completions/max_terminated_length": 452.8,
"completions/mean_length": 191.644921875,
"completions/mean_terminated_length": 191.644921875,
"completions/min_length": 97.6,
"completions/min_terminated_length": 97.6,
"epoch": 0.88,
"grad_norm": 0.0009742515976540744,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 929536915.0,
"reward": 0.8399426817893982,
"reward_std": 0.09036057144403457,
"rewards/accuracy_reward": 0.51142578125,
"rewards/brier_reward": 0.7988796353340148,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.003130771638825536,
"rewards/frontier_coverage_1": 0.13325000703334808,
"rewards/frontier_coverage_10": 0.13197922110557556,
"rewards/frontier_coverage_15": 0.11483763456344605,
"rewards/frontier_coverage_20": 0.07162886634469032,
"rewards/frontier_coverage_25": 0.05139811635017395,
"rewards/frontier_coverage_5": 0.13309186547994614,
"rewards/frontier_ece_reward": 0.0023863946786150335,
"rewards/frontier_entropy_batch_reward": -0.21808099746704102,
"signal/accuracy_reward/centered_abs_mean": 0.097894287109375,
"signal/accuracy_reward/group_bin_occupancy": 0.169921875,
"signal/accuracy_reward/group_std_mean": 0.1274777978658676,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0489471435546875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0489471435546875,
"signal/advantage_abs_mean": 0.0720147468149662,
"signal/advantage_pre_scale_abs_mean": 0.0720147468149662,
"signal/advantage_pre_scale_std": 0.11001690626144409,
"signal/advantage_std": 0.11001690626144409,
"signal/brier_reward/centered_abs_mean": 0.11212355941534043,
"signal/brier_reward/group_bin_occupancy": 0.84609375,
"signal/brier_reward/group_std_mean": 0.14494499266147615,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014015444926917553,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014015444926917553,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0028623202815651894,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.706640625,
"signal/frontier_aurc_reward/group_std_mean": 0.004754068516194821,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.123553055454977e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.123553055454977e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15715896785259248,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8703125,
"signal/frontier_coverage_1/group_std_mean": 0.20266908705234526,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002813145564869046,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002813145564869046,
"signal/frontier_coverage_10/centered_abs_mean": 0.15548037588596345,
"signal/frontier_coverage_10/group_bin_occupancy": 0.869921875,
"signal/frontier_coverage_10/group_std_mean": 0.20054614543914795,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027830985840409995,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027830985840409995,
"signal/frontier_coverage_15/centered_abs_mean": 0.13485134840011598,
"signal/frontier_coverage_15/group_bin_occupancy": 0.865625,
"signal/frontier_coverage_15/group_std_mean": 0.17428669035434724,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002413839101791382,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002413839101791382,
"signal/frontier_coverage_20/centered_abs_mean": 0.0783051684498787,
"signal/frontier_coverage_20/group_bin_occupancy": 0.9046875,
"signal/frontier_coverage_20/group_std_mean": 0.10148594677448272,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014016624772921205,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014016624772921205,
"signal/frontier_coverage_25/centered_abs_mean": 0.0531325563788414,
"signal/frontier_coverage_25/group_bin_occupancy": 0.930859375,
"signal/frontier_coverage_25/group_std_mean": 0.06878565400838851,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009510727250017226,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009510727250017226,
"signal/frontier_coverage_5/centered_abs_mean": 0.15696605443954467,
"signal/frontier_coverage_5/group_bin_occupancy": 0.87109375,
"signal/frontier_coverage_5/group_std_mean": 0.20242418348789215,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002809692220762372,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002809692220762372,
"signal/frontier_ece_reward/centered_abs_mean": 0.004732540622353554,
"signal/frontier_ece_reward/group_bin_occupancy": 0.903125,
"signal/frontier_ece_reward/group_std_mean": 0.006143409106880426,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005915675777941942,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005915675777941942,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28106330037117006,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.722265625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35157610177993776,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03513291254639626,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03513291254639626,
"step": 275
},
{
"calibration/aurc": 0.3373058154661964,
"calibration/batch_distribution_entropy": 0.9826539498416601,
"calibration/batch_entropy_100bins": 0.9682695002978061,
"calibration/batch_entropy_10bins": 0.9826539498416601,
"calibration/batch_entropy_50bins": 0.9762102980123724,
"calibration/batch_uniqueness": 0.953095282009279,
"calibration/buffer_distribution_entropy": 0.999126223691265,
"calibration/buffer_entropy_100bins": 0.9990657505371372,
"calibration/buffer_entropy_10bins": 0.999126223691265,
"calibration/buffer_entropy_50bins": 0.9991405062324873,
"calibration/confidence_entropy": 0.48298531010661866,
"calibration/coverage@0%": 0.016410072162426615,
"calibration/coverage@1%": 0.016410072162426615,
"calibration/coverage@10%": 0.0601661876223092,
"calibration/coverage@15%": 0.08399660591976517,
"calibration/coverage@20%": 0.3101944716242661,
"calibration/coverage@25%": 0.40160989481409004,
"calibration/coverage@30%": 0.4906815680039139,
"calibration/coverage@5%": 0.040628822162426616,
"calibration/ece": 0.14296123700914443,
"calibration/mean_confidence": 0.4870876866229802,
"calibration/prompt_uniqueness": 0.8266549736602498,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 1082.6,
"completions/max_terminated_length": 495.4,
"completions/mean_length": 190.72861328125,
"completions/mean_terminated_length": 190.20317687988282,
"completions/min_length": 97.4,
"completions/min_terminated_length": 97.4,
"epoch": 0.896,
"grad_norm": 0.0011140021961182356,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 946600824.0,
"reward": 0.8575990200042725,
"reward_std": 0.08079204559326172,
"rewards/accuracy_reward": 0.54970703125,
"rewards/brier_reward": 0.7908406734466553,
"rewards/format_reward": 0.999609375,
"rewards/frontier_aurc_reward": -0.0027651153039187195,
"rewards/frontier_coverage_1": 0.10620979815721512,
"rewards/frontier_coverage_10": 0.10566670447587967,
"rewards/frontier_coverage_15": 0.09410437047481537,
"rewards/frontier_coverage_20": 0.06202979385852814,
"rewards/frontier_coverage_25": 0.04811366051435471,
"rewards/frontier_coverage_5": 0.10607990473508835,
"rewards/frontier_ece_reward": 0.0011842235224321484,
"rewards/frontier_entropy_batch_reward": -0.20288202166557312,
"signal/accuracy_reward/centered_abs_mean": 0.081243896484375,
"signal/accuracy_reward/group_bin_occupancy": 0.16640625,
"signal/accuracy_reward/group_std_mean": 0.11124018728733062,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0406219482421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0406219482421875,
"signal/advantage_abs_mean": 0.061866439133882525,
"signal/advantage_pre_scale_abs_mean": 0.061866439133882525,
"signal/advantage_pre_scale_std": 0.0962506964802742,
"signal/advantage_std": 0.0962506964802742,
"signal/brier_reward/centered_abs_mean": 0.11059802174568176,
"signal/brier_reward/group_bin_occupancy": 0.858984375,
"signal/brier_reward/group_std_mean": 0.14146918654441834,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01382475271821022,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.01382475271821022,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_bin_occupancy": 0.1265625,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002222577598877251,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.73046875,
"signal/frontier_aurc_reward/group_std_mean": 0.003623427450656891,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.978413733420894e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.978413733420894e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15609081983566284,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86484375,
"signal/frontier_coverage_1/group_std_mean": 0.20036340057849883,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027940256986767054,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027940256986767054,
"signal/frontier_coverage_10/centered_abs_mean": 0.15488055050373079,
"signal/frontier_coverage_10/group_bin_occupancy": 0.864453125,
"signal/frontier_coverage_10/group_std_mean": 0.19879828989505768,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027723620180040596,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027723620180040596,
"signal/frontier_coverage_15/centered_abs_mean": 0.13422557562589646,
"signal/frontier_coverage_15/group_bin_occupancy": 0.857421875,
"signal/frontier_coverage_15/group_std_mean": 0.17230915725231172,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024026377592235803,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024026377592235803,
"signal/frontier_coverage_20/centered_abs_mean": 0.07715532034635544,
"signal/frontier_coverage_20/group_bin_occupancy": 0.881640625,
"signal/frontier_coverage_20/group_std_mean": 0.09921250641345977,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013810801785439253,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013810801785439253,
"signal/frontier_coverage_25/centered_abs_mean": 0.05220200940966606,
"signal/frontier_coverage_25/group_bin_occupancy": 0.918359375,
"signal/frontier_coverage_25/group_std_mean": 0.06679405272006989,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000934415915980935,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000934415915980935,
"signal/frontier_coverage_5/centered_abs_mean": 0.1558253914117813,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8640625,
"signal/frontier_coverage_5/group_std_mean": 0.20002435743808747,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027892745565623045,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027892745565623045,
"signal/frontier_ece_reward/centered_abs_mean": 0.0045457611791789535,
"signal/frontier_ece_reward/group_bin_occupancy": 0.897265625,
"signal/frontier_ece_reward/group_std_mean": 0.005920033343136311,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005682201473973692,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005682201473973692,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2616787314414978,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.728125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3359409987926483,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03270984143018722,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03270984143018722,
"step": 280
},
{
"calibration/aurc": 0.3549690512751742,
"calibration/batch_distribution_entropy": 0.977302492713015,
"calibration/batch_entropy_100bins": 0.9687005838156748,
"calibration/batch_entropy_10bins": 0.977302492713015,
"calibration/batch_entropy_50bins": 0.9769308698120451,
"calibration/batch_uniqueness": 0.9527252197265625,
"calibration/buffer_distribution_entropy": 0.9990242862387232,
"calibration/buffer_entropy_100bins": 0.9989872340708235,
"calibration/buffer_entropy_10bins": 0.9990242862387232,
"calibration/buffer_entropy_50bins": 0.9990547510383561,
"calibration/confidence_entropy": 0.49627782732928577,
"calibration/coverage@0%": 0.01796875,
"calibration/coverage@1%": 0.01796875,
"calibration/coverage@10%": 0.061328125,
"calibration/coverage@15%": 0.096484375,
"calibration/coverage@20%": 0.25859375,
"calibration/coverage@25%": 0.393359375,
"calibration/coverage@30%": 0.455859375,
"calibration/coverage@5%": 0.0484375,
"calibration/ece": 0.15157571405774412,
"calibration/mean_confidence": 0.49334667608446414,
"calibration/prompt_uniqueness": 0.845263671875,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 865.6,
"completions/max_terminated_length": 455.2,
"completions/mean_length": 191.36181640625,
"completions/mean_terminated_length": 191.0985565185547,
"completions/min_length": 86.0,
"completions/min_terminated_length": 86.0,
"epoch": 0.912,
"grad_norm": 0.0010114161996170878,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 963611665.0,
"reward": 0.8577099800109863,
"reward_std": 0.08399459272623062,
"rewards/accuracy_reward": 0.54443359375,
"rewards/brier_reward": 0.8012910604476928,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0027018039952963592,
"rewards/frontier_coverage_1": 0.1103449311107397,
"rewards/frontier_coverage_10": 0.10959461368620396,
"rewards/frontier_coverage_15": 0.09755977056920528,
"rewards/frontier_coverage_20": 0.06432019025087357,
"rewards/frontier_coverage_25": 0.05184435471892357,
"rewards/frontier_coverage_5": 0.11015897234901786,
"rewards/frontier_ece_reward": 0.001691946922801435,
"rewards/frontier_entropy_batch_reward": -0.19574475586414336,
"signal/accuracy_reward/centered_abs_mean": 0.080206298828125,
"signal/accuracy_reward/group_bin_occupancy": 0.167578125,
"signal/accuracy_reward/group_std_mean": 0.11207558661699295,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0401031494140625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0401031494140625,
"signal/advantage_abs_mean": 0.06401625275611877,
"signal/advantage_pre_scale_abs_mean": 0.06401625275611877,
"signal/advantage_pre_scale_std": 0.10008785128593445,
"signal/advantage_std": 0.10008785128593445,
"signal/brier_reward/centered_abs_mean": 0.11323688179254532,
"signal/brier_reward/group_bin_occupancy": 0.8328125,
"signal/brier_reward/group_std_mean": 0.14842240512371063,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014154610224068165,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.014154610224068165,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023412939393892885,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.730859375,
"signal/frontier_aurc_reward/group_std_mean": 0.003946681786328554,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.190915824437979e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.190915824437979e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15513492822647096,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_1/group_std_mean": 0.20151489973068237,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0027769151609390976,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027769151609390976,
"signal/frontier_coverage_10/centered_abs_mean": 0.15395722687244415,
"signal/frontier_coverage_10/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_10/group_std_mean": 0.19999560713768005,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027558341156691314,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027558341156691314,
"signal/frontier_coverage_15/centered_abs_mean": 0.13296782821416855,
"signal/frontier_coverage_15/group_bin_occupancy": 0.853515625,
"signal/frontier_coverage_15/group_std_mean": 0.17283936440944672,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023801239673048257,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023801239673048257,
"signal/frontier_coverage_20/centered_abs_mean": 0.0778110533952713,
"signal/frontier_coverage_20/group_bin_occupancy": 0.889453125,
"signal/frontier_coverage_20/group_std_mean": 0.10108065158128739,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013928177999332548,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013928177999332548,
"signal/frontier_coverage_25/centered_abs_mean": 0.0549514427781105,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9234375,
"signal/frontier_coverage_25/group_std_mean": 0.07073460221290588,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000983630819246173,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000983630819246173,
"signal/frontier_coverage_5/centered_abs_mean": 0.15491481125354767,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86328125,
"signal/frontier_coverage_5/group_std_mean": 0.20123314261436462,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0027729750145226717,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027729750145226717,
"signal/frontier_ece_reward/centered_abs_mean": 0.004723855573683977,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8921875,
"signal/frontier_ece_reward/group_std_mean": 0.006184379477053881,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005904819467104971,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005904819467104971,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2582594394683838,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.738671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33268279433250425,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.032282429933547976,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032282429933547976,
"step": 285
},
{
"calibration/aurc": 0.41280512387379353,
"calibration/batch_distribution_entropy": 0.983191233962286,
"calibration/batch_entropy_100bins": 0.9707893275502993,
"calibration/batch_entropy_10bins": 0.983191233962286,
"calibration/batch_entropy_50bins": 0.9796747150760041,
"calibration/batch_uniqueness": 0.9539581298828125,
"calibration/buffer_distribution_entropy": 0.9990114243714435,
"calibration/buffer_entropy_100bins": 0.9990231310977965,
"calibration/buffer_entropy_10bins": 0.9990114243714435,
"calibration/buffer_entropy_50bins": 0.9990782041438109,
"calibration/confidence_entropy": 0.5056221875295627,
"calibration/coverage@0%": 0.005859375,
"calibration/coverage@1%": 0.005859375,
"calibration/coverage@10%": 0.0171875,
"calibration/coverage@15%": 0.019140625,
"calibration/coverage@20%": 0.033203125,
"calibration/coverage@25%": 0.10234375,
"calibration/coverage@30%": 0.267578125,
"calibration/coverage@5%": 0.01171875,
"calibration/ece": 0.13720565678424684,
"calibration/mean_confidence": 0.5138671169361444,
"calibration/prompt_uniqueness": 0.854052734375,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 698.8,
"completions/max_terminated_length": 482.6,
"completions/mean_length": 188.668359375,
"completions/mean_terminated_length": 188.5367401123047,
"completions/min_length": 93.4,
"completions/min_terminated_length": 93.4,
"epoch": 0.928,
"grad_norm": 0.0009088640799745917,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 980570445.0,
"reward": 0.8483018755912781,
"reward_std": 0.08071554303169251,
"rewards/accuracy_reward": 0.53251953125,
"rewards/brier_reward": 0.7901157855987548,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.003404234582558274,
"rewards/frontier_coverage_1": 0.11180114150047302,
"rewards/frontier_coverage_10": 0.11092503815889358,
"rewards/frontier_coverage_15": 0.09873643815517426,
"rewards/frontier_coverage_20": 0.0643385447561741,
"rewards/frontier_coverage_25": 0.052167801558971404,
"rewards/frontier_coverage_5": 0.1116182416677475,
"rewards/frontier_ece_reward": 0.0019177033798769116,
"rewards/frontier_entropy_batch_reward": -0.21312889754772185,
"signal/accuracy_reward/centered_abs_mean": 0.072637939453125,
"signal/accuracy_reward/group_bin_occupancy": 0.163671875,
"signal/accuracy_reward/group_std_mean": 0.10080017894506454,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0363189697265625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0363189697265625,
"signal/advantage_abs_mean": 0.06181541979312897,
"signal/advantage_pre_scale_abs_mean": 0.06181541979312897,
"signal/advantage_pre_scale_std": 0.09706850945949555,
"signal/advantage_std": 0.09706850945949555,
"signal/brier_reward/centered_abs_mean": 0.111118184030056,
"signal/brier_reward/group_bin_occupancy": 0.83671875,
"signal/brier_reward/group_std_mean": 0.14327452182769776,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013889773003757,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013889773003757,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0029117835219949484,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.727734375,
"signal/frontier_aurc_reward/group_std_mean": 0.004748767055571079,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 5.2120923646725716e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 5.2120923646725716e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14507719576358796,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_1/group_std_mean": 0.1872227430343628,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002596881752833724,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002596881752833724,
"signal/frontier_coverage_10/centered_abs_mean": 0.1439109742641449,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8546875,
"signal/frontier_coverage_10/group_std_mean": 0.18574815690517427,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002576006343588233,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002576006343588233,
"signal/frontier_coverage_15/centered_abs_mean": 0.1262580692768097,
"signal/frontier_coverage_15/group_bin_occupancy": 0.846484375,
"signal/frontier_coverage_15/group_std_mean": 0.16331798434257508,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022600193507969378,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022600193507969378,
"signal/frontier_coverage_20/centered_abs_mean": 0.0749655857682228,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_20/group_std_mean": 0.09689257442951202,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013418839545920492,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013418839545920492,
"signal/frontier_coverage_25/centered_abs_mean": 0.05532756522297859,
"signal/frontier_coverage_25/group_bin_occupancy": 0.93046875,
"signal/frontier_coverage_25/group_std_mean": 0.07019431442022324,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009903633617796004,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009903633617796004,
"signal/frontier_coverage_5/centered_abs_mean": 0.14484555274248123,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_5/group_std_mean": 0.18693141639232635,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002592735271900892,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002592735271900892,
"signal/frontier_ece_reward/centered_abs_mean": 0.004770417790859937,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89296875,
"signal/frontier_ece_reward/group_std_mean": 0.0062720650807023045,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0005963022238574922,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0005963022238574922,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27077251076698305,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34694740176200867,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03384656384587288,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03384656384587288,
"step": 290
},
{
"calibration/aurc": 0.24858498275267107,
"calibration/batch_distribution_entropy": 0.9861492452770714,
"calibration/batch_entropy_100bins": 0.9737597827623423,
"calibration/batch_entropy_10bins": 0.9861492452770714,
"calibration/batch_entropy_50bins": 0.9812966268493726,
"calibration/batch_uniqueness": 0.954254150390625,
"calibration/buffer_distribution_entropy": 0.9988957247957387,
"calibration/buffer_entropy_100bins": 0.9989956482917522,
"calibration/buffer_entropy_10bins": 0.9988957247957387,
"calibration/buffer_entropy_50bins": 0.9990108307728066,
"calibration/confidence_entropy": 0.4974394635763413,
"calibration/coverage@0%": 0.025,
"calibration/coverage@1%": 0.025,
"calibration/coverage@10%": 0.169921875,
"calibration/coverage@15%": 0.289453125,
"calibration/coverage@20%": 0.42890625,
"calibration/coverage@25%": 0.5796875,
"calibration/coverage@30%": 0.6609375,
"calibration/coverage@5%": 0.09921875,
"calibration/ece": 0.10266510009719523,
"calibration/mean_confidence": 0.472734394472595,
"calibration/prompt_uniqueness": 0.839453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 446.6,
"completions/max_terminated_length": 446.6,
"completions/mean_length": 187.97138671875,
"completions/mean_terminated_length": 187.97138671875,
"completions/min_length": 97.2,
"completions/min_terminated_length": 97.2,
"epoch": 0.944,
"grad_norm": 0.0011019782396033406,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 997470696.0,
"reward": 0.8485186576843262,
"reward_std": 0.09234340786933899,
"rewards/accuracy_reward": 0.53505859375,
"rewards/brier_reward": 0.7864872336387634,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0030688193626701834,
"rewards/frontier_coverage_1": 0.10997713655233383,
"rewards/frontier_coverage_10": 0.10910578817129135,
"rewards/frontier_coverage_15": 0.09824755191802978,
"rewards/frontier_coverage_20": 0.06621812656521797,
"rewards/frontier_coverage_25": 0.04666025787591934,
"rewards/frontier_coverage_5": 0.10978993475437164,
"rewards/frontier_ece_reward": 0.0015931544359773398,
"rewards/frontier_entropy_batch_reward": -0.2170539140701294,
"signal/accuracy_reward/centered_abs_mean": 0.112640380859375,
"signal/accuracy_reward/group_bin_occupancy": 0.176171875,
"signal/accuracy_reward/group_std_mean": 0.14598776698112487,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0563201904296875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0563201904296875,
"signal/advantage_abs_mean": 0.07284359484910966,
"signal/advantage_pre_scale_abs_mean": 0.07284359484910966,
"signal/advantage_pre_scale_std": 0.11034233421087265,
"signal/advantage_std": 0.11034233421087265,
"signal/brier_reward/centered_abs_mean": 0.11176075041294098,
"signal/brier_reward/group_bin_occupancy": 0.832421875,
"signal/brier_reward/group_std_mean": 0.14492084681987763,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013970093801617622,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013970093801617622,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0024296872783452273,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.716796875,
"signal/frontier_aurc_reward/group_std_mean": 0.004007898364216089,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.349140144768171e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.349140144768171e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.1726018726825714,
"signal/frontier_coverage_1/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_1/group_std_mean": 0.22027516961097718,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003089573513716459,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003089573513716459,
"signal/frontier_coverage_10/centered_abs_mean": 0.17125667929649352,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8515625,
"signal/frontier_coverage_10/group_std_mean": 0.21858170330524446,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0030654944013804196,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030654944013804196,
"signal/frontier_coverage_15/centered_abs_mean": 0.1538640648126602,
"signal/frontier_coverage_15/group_bin_occupancy": 0.846484375,
"signal/frontier_coverage_15/group_std_mean": 0.19664142429828643,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0027541667222976685,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027541667222976685,
"signal/frontier_coverage_20/centered_abs_mean": 0.08876172602176666,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8796875,
"signal/frontier_coverage_20/group_std_mean": 0.11390969753265381,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015888348687440157,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015888348687440157,
"signal/frontier_coverage_25/centered_abs_mean": 0.054720057547092436,
"signal/frontier_coverage_25/group_bin_occupancy": 0.919140625,
"signal/frontier_coverage_25/group_std_mean": 0.07047712504863739,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009794890065677464,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009794890065677464,
"signal/frontier_coverage_5/centered_abs_mean": 0.1723033905029297,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85234375,
"signal/frontier_coverage_5/group_std_mean": 0.21990018784999849,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0030842306092381476,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030842306092381476,
"signal/frontier_ece_reward/centered_abs_mean": 0.005012043006718159,
"signal/frontier_ece_reward/group_bin_occupancy": 0.90234375,
"signal/frontier_ece_reward/group_std_mean": 0.006545277405530214,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006265053758397699,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006265053758397699,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27133584320545195,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.726953125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34131971597671507,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.033916980400681494,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033916980400681494,
"step": 295
},
{
"calibration/aurc": 0.3256529793283661,
"calibration/batch_distribution_entropy": 0.984952501452715,
"calibration/batch_entropy_100bins": 0.9718621359045295,
"calibration/batch_entropy_10bins": 0.984952501452715,
"calibration/batch_entropy_50bins": 0.9805238677858856,
"calibration/batch_uniqueness": 0.954693603515625,
"calibration/buffer_distribution_entropy": 0.9989409552203344,
"calibration/buffer_entropy_100bins": 0.9990398793740454,
"calibration/buffer_entropy_10bins": 0.9989409552203344,
"calibration/buffer_entropy_50bins": 0.9990565379611495,
"calibration/confidence_entropy": 0.4897216704300299,
"calibration/coverage@0%": 0.01171875,
"calibration/coverage@1%": 0.01171875,
"calibration/coverage@10%": 0.104296875,
"calibration/coverage@15%": 0.2359375,
"calibration/coverage@20%": 0.308984375,
"calibration/coverage@25%": 0.3640625,
"calibration/coverage@30%": 0.5078125,
"calibration/coverage@5%": 0.012109375,
"calibration/ece": 0.13947520019378662,
"calibration/mean_confidence": 0.5198152612431157,
"calibration/prompt_uniqueness": 0.84501953125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 652.8,
"completions/max_terminated_length": 458.2,
"completions/mean_length": 188.3568359375,
"completions/mean_terminated_length": 188.22555847167968,
"completions/min_length": 101.8,
"completions/min_terminated_length": 101.8,
"epoch": 0.96,
"grad_norm": 0.0008233313565142453,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 1014339790.0,
"reward": 0.8462372064590454,
"reward_std": 0.07700852155685425,
"rewards/accuracy_reward": 0.5169921875,
"rewards/brier_reward": 0.8101608753204346,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_aurc_reward": -0.0029094903729856014,
"rewards/frontier_coverage_1": 0.13561428487300872,
"rewards/frontier_coverage_10": 0.13452683985233307,
"rewards/frontier_coverage_15": 0.1212164431810379,
"rewards/frontier_coverage_20": 0.07751094549894333,
"rewards/frontier_coverage_25": 0.05298488959670067,
"rewards/frontier_coverage_5": 0.135471972823143,
"rewards/frontier_ece_reward": 0.0022863436490297316,
"rewards/frontier_entropy_batch_reward": -0.20344921350479125,
"signal/accuracy_reward/centered_abs_mean": 0.0708251953125,
"signal/accuracy_reward/group_bin_occupancy": 0.162890625,
"signal/accuracy_reward/group_std_mean": 0.0995680645108223,
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03541259765625,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03541259765625,
"signal/advantage_abs_mean": 0.05835134610533714,
"signal/advantage_pre_scale_abs_mean": 0.05835134610533714,
"signal/advantage_pre_scale_std": 0.09123541563749313,
"signal/advantage_std": 0.09123541563749313,
"signal/brier_reward/centered_abs_mean": 0.1000775396823883,
"signal/brier_reward/group_bin_occupancy": 0.8765625,
"signal/brier_reward/group_std_mean": 0.12929840236902237,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012509692460298538,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012509692460298538,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_bin_occupancy": 0.12578125,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0023374527459964155,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.745703125,
"signal/frontier_aurc_reward/group_std_mean": 0.003886171476915479,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.184040299151093e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.184040299151093e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14079618453979492,
"signal/frontier_coverage_1/group_bin_occupancy": 0.882421875,
"signal/frontier_coverage_1/group_std_mean": 0.18256475627422333,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002520251739770174,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002520251739770174,
"signal/frontier_coverage_10/centered_abs_mean": 0.13965638279914855,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_10/group_std_mean": 0.181106236577034,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024998491164296864,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024998491164296864,
"signal/frontier_coverage_15/centered_abs_mean": 0.12565270364284514,
"signal/frontier_coverage_15/group_bin_occupancy": 0.87421875,
"signal/frontier_coverage_15/group_std_mean": 0.16323770582675934,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0022491833195090296,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022491833195090296,
"signal/frontier_coverage_20/centered_abs_mean": 0.0731646478176117,
"signal/frontier_coverage_20/group_bin_occupancy": 0.895703125,
"signal/frontier_coverage_20/group_std_mean": 0.0953644946217537,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013096471317112445,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013096471317112445,
"signal/frontier_coverage_25/centered_abs_mean": 0.05031884089112282,
"signal/frontier_coverage_25/group_bin_occupancy": 0.93359375,
"signal/frontier_coverage_25/group_std_mean": 0.06426827237010002,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009007072076201439,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009007072076201439,
"signal/frontier_coverage_5/centered_abs_mean": 0.14063106179237367,
"signal/frontier_coverage_5/group_bin_occupancy": 0.8828125,
"signal/frontier_coverage_5/group_std_mean": 0.18235519230365754,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025172959081828593,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025172959081828593,
"signal/frontier_ece_reward/centered_abs_mean": 0.0048952271230518814,
"signal/frontier_ece_reward/group_bin_occupancy": 0.89609375,
"signal/frontier_ece_reward/group_std_mean": 0.006365635897964239,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006119033903814852,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006119033903814852,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26704747676849366,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73671875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33781918287277224,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03338093459606171,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03338093459606171,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4575152377276934,
"eval_calibration/batch_distribution_entropy": 0.9083536036370418,
"eval_calibration/batch_entropy_100bins": 0.7084673039767129,
"eval_calibration/batch_entropy_10bins": 0.9083536036370418,
"eval_calibration/batch_entropy_50bins": 0.7941923347176845,
"eval_calibration/batch_uniqueness": 0.8974609375,
"eval_calibration/buffer_distribution_entropy": 0.9987965314883902,
"eval_calibration/buffer_entropy_100bins": 0.9989874140319082,
"eval_calibration/buffer_entropy_10bins": 0.9987965314883902,
"eval_calibration/buffer_entropy_50bins": 0.999000163227866,
"eval_calibration/confidence_entropy": 0.4877032802303043,
"eval_calibration/coverage@0%": 0.0859375,
"eval_calibration/coverage@1%": 0.0859375,
"eval_calibration/coverage@10%": 0.0859375,
"eval_calibration/coverage@15%": 0.09375,
"eval_calibration/coverage@20%": 0.1015625,
"eval_calibration/coverage@25%": 0.1484375,
"eval_calibration/coverage@30%": 0.15625,
"eval_calibration/coverage@5%": 0.0859375,
"eval_calibration/ece": 0.20039907035790783,
"eval_calibration/mean_confidence": 0.4575770178494413,
"eval_calibration/prompt_uniqueness": 0.8974609375,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 295.25,
"eval_completions/max_terminated_length": 295.25,
"eval_completions/mean_length": 189.70380020141602,
"eval_completions/mean_terminated_length": 189.70380020141602,
"eval_completions/min_length": 116.75,
"eval_completions/min_terminated_length": 116.75,
"eval_loss": 0.0,
"eval_num_tokens": 1014339790.0,
"eval_reward": 0.7071669399738312,
"eval_reward_std": 0.226898942142725,
"eval_rewards/accuracy_reward": 0.427734375,
"eval_rewards/brier_reward": 0.807328924536705,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_aurc_reward": -0.003334582201205194,
"eval_rewards/frontier_coverage_1": 0.20210690423846245,
"eval_rewards/frontier_coverage_10": 0.2005590945482254,
"eval_rewards/frontier_coverage_15": 0.18310636281967163,
"eval_rewards/frontier_coverage_20": 0.10963826067745686,
"eval_rewards/frontier_coverage_25": 0.05915482249110937,
"eval_rewards/frontier_coverage_5": 0.20197707042098045,
"eval_rewards/frontier_ece_reward": 0.002569766016677022,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 17.0706,
"eval_samples_per_second": 29.29,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375,
"eval_signal/accuracy_reward/group_bin_occupancy": 0.25,
"eval_signal/accuracy_reward/group_std_mean": 0.4935857355594635,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23651123046875,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875,
"eval_signal/advantage_abs_mean": 0.21183785423636436,
"eval_signal/advantage_pre_scale_abs_mean": 0.21183785423636436,
"eval_signal/advantage_pre_scale_std": 0.22447463124990463,
"eval_signal/advantage_std": 0.22447463124990463,
"eval_signal/brier_reward/centered_abs_mean": 0.1758808195590973,
"eval_signal/brier_reward/group_bin_occupancy": 0.9296875,
"eval_signal/brier_reward/group_std_mean": 0.22358601912856102,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02198510244488716,
"eval_signal/brier_reward/weight": 0.125,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.02198510244488716,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_bin_occupancy": 0.125,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_aurc_reward/centered_abs_mean": 0.004215276916511357,
"eval_signal/frontier_aurc_reward/group_bin_occupancy": 0.640625,
"eval_signal/frontier_aurc_reward/group_std_mean": 0.008374640950933099,
"eval_signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 7.545345397375058e-05,
"eval_signal/frontier_aurc_reward/weight": 0.017899999395012856,
"eval_signal/frontier_aurc_reward/weighted_centered_abs_mean": 7.545345397375058e-05,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3614576756954193,
"eval_signal/frontier_coverage_1/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_1/group_std_mean": 0.430373378098011,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00647009233944118,
"eval_signal/frontier_coverage_1/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00647009233944118,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35852116346359253,
"eval_signal/frontier_coverage_10/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_10/group_std_mean": 0.42694830149412155,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0064175286097452044,
"eval_signal/frontier_coverage_10/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0064175286097452044,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.32429099828004837,
"eval_signal/frontier_coverage_15/group_bin_occupancy": 0.9453125,
"eval_signal/frontier_coverage_15/group_std_mean": 0.387409083545208,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005804808693937957,
"eval_signal/frontier_coverage_15/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005804808693937957,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.1720643900334835,
"eval_signal/frontier_coverage_20/group_bin_occupancy": 0.8984375,
"eval_signal/frontier_coverage_20/group_std_mean": 0.21237896382808685,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030799524392932653,
"eval_signal/frontier_coverage_20/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030799524392932653,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.08441895246505737,
"eval_signal/frontier_coverage_25/group_bin_occupancy": 0.9453125,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1048442255705595,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015110991662368178,
"eval_signal/frontier_coverage_25/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015110991662368178,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3611843213438988,
"eval_signal/frontier_coverage_5/group_bin_occupancy": 0.953125,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4300566017627716,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.006465199403464794,
"eval_signal/frontier_coverage_5/weight": 0.017899999395012856,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.006465199403464794,
"eval_signal/frontier_ece_reward/centered_abs_mean": 0.006295109633356333,
"eval_signal/frontier_ece_reward/group_bin_occupancy": 0.96875,
"eval_signal/frontier_ece_reward/group_std_mean": 0.008172678295522928,
"eval_signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"eval_signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0007868887041695416,
"eval_signal/frontier_ece_reward/weight": 0.125,
"eval_signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0007868887041695416,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.125,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.125,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.234,
"step": 300
},
{
"calibration/aurc": 0.26956602079129766,
"calibration/batch_distribution_entropy": 0.975847157951916,
"calibration/batch_entropy_100bins": 0.9649908946480957,
"calibration/batch_entropy_10bins": 0.975847157951916,
"calibration/batch_entropy_50bins": 0.9739094070267618,
"calibration/batch_uniqueness": 0.95203857421875,
"calibration/buffer_distribution_entropy": 0.9987995026989941,
"calibration/buffer_entropy_100bins": 0.9989820996482228,
"calibration/buffer_entropy_10bins": 0.9987995026989941,
"calibration/buffer_entropy_50bins": 0.9989938509658627,
"calibration/confidence_entropy": 0.5116255959622197,
"calibration/coverage@0%": 0.014453125,
"calibration/coverage@1%": 0.014453125,
"calibration/coverage@10%": 0.26015625,
"calibration/coverage@15%": 0.36953125,
"calibration/coverage@20%": 0.46171875,
"calibration/coverage@25%": 0.51875,
"calibration/coverage@30%": 0.59140625,
"calibration/coverage@5%": 0.1109375,
"calibration/ece": 0.13497489066439078,
"calibration/mean_confidence": 0.49586762510521404,
"calibration/prompt_uniqueness": 0.840478515625,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 517.2,
"completions/max_terminated_length": 517.2,
"completions/mean_length": 192.8546875,
"completions/mean_terminated_length": 192.8546875,
"completions/min_length": 104.6,
"completions/min_terminated_length": 104.6,
"epoch": 0.976,
"grad_norm": 0.00106345908716321,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 1031175742.0,
"reward": 0.8589818477630615,
"reward_std": 0.08459014743566513,
"rewards/accuracy_reward": 0.554296875,
"rewards/brier_reward": 0.7969029545783997,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.0025948323775082825,
"rewards/frontier_coverage_1": 0.10237730550579727,
"rewards/frontier_coverage_10": 0.10188237186521291,
"rewards/frontier_coverage_15": 0.09465207355096936,
"rewards/frontier_coverage_20": 0.066153160110116,
"rewards/frontier_coverage_25": 0.04752057008445263,
"rewards/frontier_coverage_5": 0.102328123152256,
"rewards/frontier_ece_reward": 0.001362017064820975,
"rewards/frontier_entropy_batch_reward": -0.2165709674358368,
"signal/accuracy_reward/centered_abs_mean": 0.0876708984375,
"signal/accuracy_reward/group_bin_occupancy": 0.1703125,
"signal/accuracy_reward/group_std_mean": 0.12027212083339692,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04383544921875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04383544921875,
"signal/advantage_abs_mean": 0.06527443826198578,
"signal/advantage_pre_scale_abs_mean": 0.06527443826198578,
"signal/advantage_pre_scale_std": 0.09959482550621032,
"signal/advantage_std": 0.09959482550621032,
"signal/brier_reward/centered_abs_mean": 0.10134001821279526,
"signal/brier_reward/group_bin_occupancy": 0.856640625,
"signal/brier_reward/group_std_mean": 0.13163287788629532,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012667502276599407,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012667502276599407,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.002029223274439573,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.730078125,
"signal/frontier_aurc_reward/group_std_mean": 0.0033127402421087027,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.632309453678317e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.632309453678317e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15118311196565629,
"signal/frontier_coverage_1/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_1/group_std_mean": 0.1950996220111847,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002706177672371268,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002706177672371268,
"signal/frontier_coverage_10/centered_abs_mean": 0.15004239976406097,
"signal/frontier_coverage_10/group_bin_occupancy": 0.865234375,
"signal/frontier_coverage_10/group_std_mean": 0.1936278909444809,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002685758890584111,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002685758890584111,
"signal/frontier_coverage_15/centered_abs_mean": 0.13605864495038986,
"signal/frontier_coverage_15/group_bin_occupancy": 0.863671875,
"signal/frontier_coverage_15/group_std_mean": 0.1756308764219284,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002435449743643403,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002435449743643403,
"signal/frontier_coverage_20/centered_abs_mean": 0.0769604966044426,
"signal/frontier_coverage_20/group_bin_occupancy": 0.890234375,
"signal/frontier_coverage_20/group_std_mean": 0.09997625052928924,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013775928178802132,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013775928178802132,
"signal/frontier_coverage_25/centered_abs_mean": 0.04939193576574326,
"signal/frontier_coverage_25/group_bin_occupancy": 0.9171875,
"signal/frontier_coverage_25/group_std_mean": 0.06393922716379166,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008841155911795795,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008841155911795795,
"signal/frontier_coverage_5/centered_abs_mean": 0.1510834127664566,
"signal/frontier_coverage_5/group_bin_occupancy": 0.86640625,
"signal/frontier_coverage_5/group_std_mean": 0.19497024416923522,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002704393118619919,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002704393118619919,
"signal/frontier_ece_reward/centered_abs_mean": 0.004813673906028271,
"signal/frontier_ece_reward/group_bin_occupancy": 0.8953125,
"signal/frontier_ece_reward/group_std_mean": 0.006306731514632702,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006017092382535338,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006017092382535338,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2821305632591248,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.73046875,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3564057588577271,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0352663204073906,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0352663204073906,
"step": 305
},
{
"calibration/aurc": 0.34615962203217165,
"calibration/batch_distribution_entropy": 0.9822478080256127,
"calibration/batch_entropy_100bins": 0.9710197463283367,
"calibration/batch_entropy_10bins": 0.9822478080256127,
"calibration/batch_entropy_50bins": 0.9798762014394601,
"calibration/batch_uniqueness": 0.9536600873594491,
"calibration/buffer_distribution_entropy": 0.998897547628759,
"calibration/buffer_entropy_100bins": 0.999052160716665,
"calibration/buffer_entropy_10bins": 0.998897547628759,
"calibration/buffer_entropy_50bins": 0.9990733510872982,
"calibration/confidence_entropy": 0.4787588483252154,
"calibration/coverage@0%": 0.01914826932485323,
"calibration/coverage@1%": 0.01914826932485323,
"calibration/coverage@10%": 0.06448752446183953,
"calibration/coverage@15%": 0.11417563600782779,
"calibration/coverage@20%": 0.1626505931996086,
"calibration/coverage@25%": 0.4001697040117417,
"calibration/coverage@30%": 0.483008959148728,
"calibration/coverage@5%": 0.03282014432485323,
"calibration/ece": 0.11840179079530369,
"calibration/mean_confidence": 0.4507503788278635,
"calibration/prompt_uniqueness": 0.8376709746520552,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 606.0,
"completions/max_terminated_length": 390.0,
"completions/mean_length": 189.25205078125,
"completions/mean_terminated_length": 189.1205261230469,
"completions/min_length": 102.8,
"completions/min_terminated_length": 102.8,
"epoch": 0.992,
"grad_norm": 0.0010464171646162868,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 1048242163.0,
"reward": 0.8496513366699219,
"reward_std": 0.08171502947807312,
"rewards/accuracy_reward": 0.53125,
"rewards/brier_reward": 0.8029752850532532,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_aurc_reward": -0.002545328298583627,
"rewards/frontier_coverage_1": 0.13065127432346343,
"rewards/frontier_coverage_10": 0.12966825366020202,
"rewards/frontier_coverage_15": 0.11746386885643005,
"rewards/frontier_coverage_20": 0.07562436014413834,
"rewards/frontier_coverage_25": 0.057191865891218184,
"rewards/frontier_coverage_5": 0.13061045855283737,
"rewards/frontier_ece_reward": 0.0018452441552653908,
"rewards/frontier_entropy_batch_reward": -0.2236760824918747,
"signal/accuracy_reward/centered_abs_mean": 0.083056640625,
"signal/accuracy_reward/group_bin_occupancy": 0.163671875,
"signal/accuracy_reward/group_std_mean": 0.10889570638537407,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0415283203125,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0415283203125,
"signal/advantage_abs_mean": 0.06335543915629387,
"signal/advantage_pre_scale_abs_mean": 0.06335543915629387,
"signal/advantage_pre_scale_std": 0.0979221597313881,
"signal/advantage_std": 0.0979221597313881,
"signal/brier_reward/centered_abs_mean": 0.10264453142881394,
"signal/brier_reward/group_bin_occupancy": 0.843359375,
"signal/brier_reward/group_std_mean": 0.131204953789711,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012830566428601742,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.012830566428601742,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_bin_occupancy": 0.125390625,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 9.46044921875e-05,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0020562576595693828,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.74609375,
"signal/frontier_aurc_reward/group_std_mean": 0.0034046342596411707,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 3.680701047414914e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 3.680701047414914e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.15361351668834686,
"signal/frontier_coverage_1/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_1/group_std_mean": 0.1951121598482132,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00274968184530735,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00274968184530735,
"signal/frontier_coverage_10/centered_abs_mean": 0.1525499314069748,
"signal/frontier_coverage_10/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_10/group_std_mean": 0.1937567949295044,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0027306437492370605,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027306437492370605,
"signal/frontier_coverage_15/centered_abs_mean": 0.13804518431425095,
"signal/frontier_coverage_15/group_bin_occupancy": 0.862109375,
"signal/frontier_coverage_15/group_std_mean": 0.1754040390253067,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0024710086872801185,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024710086872801185,
"signal/frontier_coverage_20/centered_abs_mean": 0.07695982903242111,
"signal/frontier_coverage_20/group_bin_occupancy": 0.89375,
"signal/frontier_coverage_20/group_std_mean": 0.09852775484323502,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001377580827102065,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001377580827102065,
"signal/frontier_coverage_25/centered_abs_mean": 0.05279005914926529,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92890625,
"signal/frontier_coverage_25/group_std_mean": 0.06735634654760361,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009449420729652047,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009449420729652047,
"signal/frontier_coverage_5/centered_abs_mean": 0.15354090929031372,
"signal/frontier_coverage_5/group_bin_occupancy": 0.866796875,
"signal/frontier_coverage_5/group_std_mean": 0.19501928389072418,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002748382231220603,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002748382231220603,
"signal/frontier_ece_reward/centered_abs_mean": 0.004886813275516033,
"signal/frontier_ece_reward/group_bin_occupancy": 0.918359375,
"signal/frontier_ece_reward/group_std_mean": 0.0063082781620323655,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006108516594395041,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006108516594395041,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27488839626312256,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.717578125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35261892080307006,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03436104953289032,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03436104953289032,
"step": 310
},
{
"calibration/aurc": 0.2688797599130984,
"calibration/batch_distribution_entropy": 0.9675266719184563,
"calibration/batch_entropy_100bins": 0.9649150285306254,
"calibration/batch_entropy_10bins": 0.9675266719184563,
"calibration/batch_entropy_50bins": 0.9690713029080542,
"calibration/batch_uniqueness": 0.951904296875,
"calibration/buffer_distribution_entropy": 0.9989472583325865,
"calibration/buffer_entropy_100bins": 0.9990723220281166,
"calibration/buffer_entropy_10bins": 0.9989472583325865,
"calibration/buffer_entropy_50bins": 0.9991024787924342,
"calibration/confidence_entropy": 0.46889996447870286,
"calibration/coverage@0%": 0.01953125,
"calibration/coverage@1%": 0.01953125,
"calibration/coverage@10%": 0.03515625,
"calibration/coverage@15%": 0.130859375,
"calibration/coverage@20%": 0.2314453125,
"calibration/coverage@25%": 0.4111328125,
"calibration/coverage@30%": 0.73046875,
"calibration/coverage@5%": 0.02734375,
"calibration/ece": 0.14754415918657318,
"calibration/mean_confidence": 0.581622264145669,
"calibration/prompt_uniqueness": 0.8184814453125,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 380.5,
"completions/max_terminated_length": 380.5,
"completions/mean_length": 186.61524963378906,
"completions/mean_terminated_length": 186.61524963378906,
"completions/min_length": 106.5,
"completions/min_terminated_length": 106.5,
"epoch": 0.9984,
"num_tokens": 1055012517.0,
"reward": 0.8589463829994202,
"reward_std": 0.08560431376099586,
"rewards/accuracy_reward": 0.558837890625,
"rewards/brier_reward": 0.7794443070888519,
"rewards/format_reward": 1.0,
"rewards/frontier_aurc_reward": -0.0029379306361079216,
"rewards/frontier_coverage_1": 0.07742930576205254,
"rewards/frontier_coverage_10": 0.0766817256808281,
"rewards/frontier_coverage_15": 0.06769302859902382,
"rewards/frontier_coverage_20": 0.04756389185786247,
"rewards/frontier_coverage_25": 0.04449248127639294,
"rewards/frontier_coverage_5": 0.07734929025173187,
"rewards/frontier_ece_reward": 0.0017147985054180026,
"rewards/frontier_entropy_batch_reward": -0.20054014027118683,
"signal/accuracy_reward/centered_abs_mean": 0.0843353271484375,
"signal/accuracy_reward/group_bin_occupancy": 0.1689453125,
"signal/accuracy_reward/group_std_mean": 0.117337416857481,
"signal/accuracy_reward/group_zero_std_frac": 0.6484375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04216766357421875,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04216766357421875,
"signal/advantage_abs_mean": 0.06579190492630005,
"signal/advantage_pre_scale_abs_mean": 0.06579190492630005,
"signal/advantage_pre_scale_std": 0.10127944126725197,
"signal/advantage_std": 0.10127944126725197,
"signal/brier_reward/centered_abs_mean": 0.11069391667842865,
"signal/brier_reward/group_bin_occupancy": 0.86328125,
"signal/brier_reward/group_std_mean": 0.13984516263008118,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013836739584803581,
"signal/brier_reward/weight": 0.125,
"signal/brier_reward/weighted_centered_abs_mean": 0.013836739584803581,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_bin_occupancy": 0.125,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_aurc_reward/centered_abs_mean": 0.0025345467729493976,
"signal/frontier_aurc_reward/group_bin_occupancy": 0.7607421875,
"signal/frontier_aurc_reward/group_std_mean": 0.004020490450784564,
"signal/frontier_aurc_reward/group_zero_std_frac": 0.0,
"signal/frontier_aurc_reward/scaled_weighted_centered_abs_mean": 4.536838969215751e-05,
"signal/frontier_aurc_reward/weight": 0.017899999395012856,
"signal/frontier_aurc_reward/weighted_centered_abs_mean": 4.536838969215751e-05,
"signal/frontier_coverage_1/centered_abs_mean": 0.14754138886928558,
"signal/frontier_coverage_1/group_bin_occupancy": 0.8544921875,
"signal/frontier_coverage_1/group_std_mean": 0.1890631541609764,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026409910060465336,
"signal/frontier_coverage_1/weight": 0.017899999395012856,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026409910060465336,
"signal/frontier_coverage_10/centered_abs_mean": 0.14654190093278885,
"signal/frontier_coverage_10/group_bin_occupancy": 0.8544921875,
"signal/frontier_coverage_10/group_std_mean": 0.1877775639295578,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002623099833726883,
"signal/frontier_coverage_10/weight": 0.017899999395012856,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002623099833726883,
"signal/frontier_coverage_15/centered_abs_mean": 0.13195443153381348,
"signal/frontier_coverage_15/group_bin_occupancy": 0.8564453125,
"signal/frontier_coverage_15/group_std_mean": 0.16884687542915344,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023619841085746884,
"signal/frontier_coverage_15/weight": 0.017899999395012856,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023619841085746884,
"signal/frontier_coverage_20/centered_abs_mean": 0.07070561498403549,
"signal/frontier_coverage_20/group_bin_occupancy": 0.8916015625,
"signal/frontier_coverage_20/group_std_mean": 0.09012233838438988,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001265630533453077,
"signal/frontier_coverage_20/weight": 0.017899999395012856,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001265630533453077,
"signal/frontier_coverage_25/centered_abs_mean": 0.04924464598298073,
"signal/frontier_coverage_25/group_bin_occupancy": 0.92578125,
"signal/frontier_coverage_25/group_std_mean": 0.06359815411269665,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008814791508484632,
"signal/frontier_coverage_25/weight": 0.017899999395012856,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008814791508484632,
"signal/frontier_coverage_5/centered_abs_mean": 0.1474618911743164,
"signal/frontier_coverage_5/group_bin_occupancy": 0.85546875,
"signal/frontier_coverage_5/group_std_mean": 0.18895908445119858,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026395675959065557,
"signal/frontier_coverage_5/weight": 0.017899999395012856,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026395675959065557,
"signal/frontier_ece_reward/centered_abs_mean": 0.004972347756847739,
"signal/frontier_ece_reward/group_bin_occupancy": 0.9130859375,
"signal/frontier_ece_reward/group_std_mean": 0.006475380156189203,
"signal/frontier_ece_reward/group_zero_std_frac": 0.0,
"signal/frontier_ece_reward/scaled_weighted_centered_abs_mean": 0.0006215434696059674,
"signal/frontier_ece_reward/weight": 0.125,
"signal/frontier_ece_reward/weighted_centered_abs_mean": 0.0006215434696059674,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2751055657863617,
"signal/frontier_entropy_batch_reward/group_bin_occupancy": 0.712890625,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3516087681055069,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.03438819572329521,
"signal/frontier_entropy_batch_reward/weight": 0.125,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03438819572329521,
"step": 312,
"total_flos": 0.0,
"train_loss": 0.0041191707219135985,
"train_runtime": 59190.2375,
"train_samples_per_second": 0.338,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1055012517,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}