Files
RLCR-v4-ks-uniqueness-cov0-…/trainer_state.json
ModelHub XC 148151b6c0 初始化项目,由ModelHub XC社区提供模型
Model: hector-gr/RLCR-v4-ks-uniqueness-cov0-entropy100-noece-noaurc-scaletrue-hotpot
Source: Original Platform
2026-04-21 18:25:51 +08:00

8566 lines
527 KiB
JSON

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 50,
"global_step": 312,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"calibration/aurc": 0.6339422860552735,
"calibration/batch_distribution_entropy": 0.6446441729443816,
"calibration/confidence_entropy": 0.3458862984235007,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4998652652500846,
"calibration/mean_confidence": 0.790780918036414,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03681640625,
"completions/max_length": 1490.6,
"completions/max_terminated_length": 1490.6,
"completions/mean_length": 215.999609375,
"completions/mean_terminated_length": 224.2462951660156,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.016,
"grad_norm": 0.12977811694145203,
"learning_rate": 3.1249999999999997e-07,
"loss": 0.017,
"num_tokens": 17055868.0,
"reward": 0.500128197669983,
"reward_std": 0.3792427361011505,
"rewards/accuracy_reward": 0.2193359375,
"rewards/brier_reward": 0.3712968111038208,
"rewards/confidence_uniqueness_reward": 0.4833169639110565,
"rewards/format_reward": 0.6802734375,
"rewards/frontier_coverage_0": 0.29783164262771605,
"rewards/frontier_coverage_1": 0.29783164262771605,
"rewards/frontier_coverage_10": 0.29783164262771605,
"rewards/frontier_coverage_15": 0.29783164262771605,
"rewards/frontier_coverage_20": 0.29783164262771605,
"rewards/frontier_coverage_25": 0.29783164262771605,
"rewards/frontier_coverage_5": 0.29783164262771605,
"rewards/frontier_entropy_batch_reward": -0.649508249759674,
"signal/accuracy_reward/centered_abs_mean": 0.24100341796875,
"signal/accuracy_reward/group_std_mean": 0.2807555437088013,
"signal/accuracy_reward/group_zero_std_frac": 0.33125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.34670318365097047,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.120501708984375,
"signal/advantage_abs_mean": 0.8450873494148254,
"signal/advantage_pre_scale_abs_mean": 0.32363836765289306,
"signal/advantage_pre_scale_std": 0.3882134258747101,
"signal/advantage_std": 0.9842045307159424,
"signal/brier_reward/centered_abs_mean": 0.31905131340026854,
"signal/brier_reward/group_std_mean": 0.36378265619277955,
"signal/brier_reward/group_zero_std_frac": 0.003125,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09252839237451553,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.03190513178706169,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.2979742467403412,
"signal/confidence_uniqueness_reward/group_std_mean": 0.3481938362121582,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08685077279806137,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029797424748539924,
"signal/format_reward/centered_abs_mean": 0.40498046875,
"signal/format_reward/group_std_mean": 0.4544346511363983,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5899764060974121,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.202490234375,
"signal/frontier_coverage_0/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_0/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_0/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_1/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_1/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_1/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_10/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_10/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_10/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_15/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_15/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_15/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_20/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_20/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_20/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_25/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_25/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_25/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_coverage_5/centered_abs_mean": 0.29145163893699644,
"signal/frontier_coverage_5/group_std_mean": 0.3415479838848114,
"signal/frontier_coverage_5/group_zero_std_frac": 0.003125,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012060248106718064,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004167758347466588,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42818194031715395,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.47405582666397095,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12476505488157272,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0428181953728199,
"step": 5
},
{
"calibration/aurc": 0.6660608772094376,
"calibration/batch_distribution_entropy": 0.6544913229610491,
"calibration/confidence_entropy": 0.33948615354690503,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.5208878541612636,
"calibration/mean_confidence": 0.7934546409463656,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.03876953125,
"completions/max_length": 1502.0,
"completions/max_terminated_length": 1502.0,
"completions/mean_length": 202.78828125,
"completions/mean_terminated_length": 210.9924285888672,
"completions/min_length": 0.0,
"completions/min_terminated_length": 2.0,
"epoch": 0.032,
"grad_norm": 0.13490329682826996,
"learning_rate": 6.249999999999999e-07,
"loss": 0.0069,
"num_tokens": 34232772.0,
"reward": 0.5240197837352752,
"reward_std": 0.35646448731422425,
"rewards/accuracy_reward": 0.21396484375,
"rewards/brier_reward": 0.38541473150253297,
"rewards/confidence_uniqueness_reward": 0.5249821066856384,
"rewards/format_reward": 0.72978515625,
"rewards/frontier_coverage_0": 0.3038561224937439,
"rewards/frontier_coverage_1": 0.3038561224937439,
"rewards/frontier_coverage_10": 0.3038561224937439,
"rewards/frontier_coverage_15": 0.3038561224937439,
"rewards/frontier_coverage_20": 0.3038561224937439,
"rewards/frontier_coverage_25": 0.3038561224937439,
"rewards/frontier_coverage_5": 0.3038561224937439,
"rewards/frontier_entropy_batch_reward": -0.6931091666221618,
"signal/accuracy_reward/centered_abs_mean": 0.223748779296875,
"signal/accuracy_reward/group_std_mean": 0.26854496598243716,
"signal/accuracy_reward/group_zero_std_frac": 0.33125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3456988275051117,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.1118743896484375,
"signal/advantage_abs_mean": 0.8104079842567444,
"signal/advantage_pre_scale_abs_mean": 0.293930447101593,
"signal/advantage_pre_scale_std": 0.3660040318965912,
"signal/advantage_std": 0.9841862320899963,
"signal/brier_reward/centered_abs_mean": 0.30312992334365846,
"signal/brier_reward/group_std_mean": 0.3520526349544525,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09381168931722642,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.030312991887331008,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.27632507085800173,
"signal/confidence_uniqueness_reward/group_std_mean": 0.33531762957572936,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08543038666248322,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027632508799433707,
"signal/format_reward/centered_abs_mean": 0.362567138671875,
"signal/format_reward/group_std_mean": 0.4284077942371368,
"signal/format_reward/group_zero_std_frac": 0.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.5599912583827973,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.1812835693359375,
"signal/frontier_coverage_0/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_0/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_1/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_1/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_10/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_10/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_15/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_15/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_20/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_20/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_25/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_25/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_coverage_5/centered_abs_mean": 0.27730215191841123,
"signal/frontier_coverage_5/group_std_mean": 0.331596827507019,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012270637229084969,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003965420834720135,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39430789947509765,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4540126621723175,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12191563993692398,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0394307903945446,
"step": 10
},
{
"calibration/aurc": 0.5802905987854532,
"calibration/batch_distribution_entropy": 0.6451810079102698,
"calibration/buffer_distribution_entropy": 0.6653544222673999,
"calibration/confidence_entropy": 0.34505604458967376,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.4489973757469999,
"calibration/mean_confidence": 0.8003520858627509,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0158203125,
"completions/max_length": 1489.8,
"completions/max_terminated_length": 1489.8,
"completions/mean_length": 167.04140625,
"completions/mean_terminated_length": 169.87072143554687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 14.8,
"epoch": 0.048,
"grad_norm": 0.06792226433753967,
"learning_rate": 9.374999999999999e-07,
"loss": 0.0051,
"num_tokens": 50992012.0,
"reward": 0.6604457139968872,
"reward_std": 0.26606449782848357,
"rewards/accuracy_reward": 0.290234375,
"rewards/brier_reward": 0.5046543598175048,
"rewards/confidence_uniqueness_reward": 0.6614177465438843,
"rewards/format_reward": 0.9013671875,
"rewards/frontier_coverage_0": 0.3303596466779709,
"rewards/frontier_coverage_1": 0.3303596466779709,
"rewards/frontier_coverage_10": 0.3303596466779709,
"rewards/frontier_coverage_15": 0.3303596466779709,
"rewards/frontier_coverage_20": 0.3303596466779709,
"rewards/frontier_coverage_25": 0.3303596466779709,
"rewards/frontier_coverage_5": 0.3303596466779709,
"rewards/frontier_entropy_batch_reward": -0.8503130555152894,
"signal/accuracy_reward/centered_abs_mean": 0.1984130859375,
"signal/accuracy_reward/group_std_mean": 0.24557192921638488,
"signal/accuracy_reward/group_zero_std_frac": 0.365625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4712305724620819,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.09920654296875,
"signal/advantage_abs_mean": 0.7059193015098572,
"signal/advantage_pre_scale_abs_mean": 0.19891976118087767,
"signal/advantage_pre_scale_std": 0.2803692609071732,
"signal/advantage_std": 0.9840129852294922,
"signal/brier_reward/centered_abs_mean": 0.26394283175468447,
"signal/brier_reward/group_std_mean": 0.3196195185184479,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12516716569662095,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02639428377151489,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.1863771289587021,
"signal/confidence_uniqueness_reward/group_std_mean": 0.24599670469760895,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08663659989833832,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01863771304488182,
"signal/format_reward/centered_abs_mean": 0.16243896484375,
"signal/format_reward/group_std_mean": 0.25803537368774415,
"signal/format_reward/group_zero_std_frac": 0.1375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.34426852166652677,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.081219482421875,
"signal/frontier_coverage_0/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_0/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_1/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_1/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_10/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_10/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_15/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_15/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_20/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_20/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_25/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_25/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_coverage_5/centered_abs_mean": 0.23436213433742523,
"signal/frontier_coverage_5/group_std_mean": 0.2903676062822342,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01507992073893547,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033513785572722556,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2380182147026062,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3486140549182892,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10810260623693466,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02380182184278965,
"step": 15
},
{
"calibration/aurc": 0.5322016670746652,
"calibration/batch_distribution_entropy": 0.7705823528313185,
"calibration/buffer_distribution_entropy": 0.6686908598810465,
"calibration/confidence_entropy": 0.4170525519014621,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0,
"calibration/coverage@20%": 0.0,
"calibration/coverage@25%": 0.0,
"calibration/coverage@30%": 0.0,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.32542858900510585,
"calibration/mean_confidence": 0.7265149808992029,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0044921875,
"completions/max_length": 1205.0,
"completions/max_terminated_length": 1205.0,
"completions/mean_length": 127.00986328125,
"completions/mean_terminated_length": 127.59269409179687,
"completions/min_length": 0.0,
"completions/min_terminated_length": 25.0,
"epoch": 0.064,
"grad_norm": 0.13832873106002808,
"learning_rate": 1e-06,
"loss": -0.0098,
"num_tokens": 67210993.0,
"reward": 0.7248212337493897,
"reward_std": 0.17984383106231688,
"rewards/accuracy_reward": 0.3447265625,
"rewards/brier_reward": 0.610313105583191,
"rewards/confidence_uniqueness_reward": 0.7996426224708557,
"rewards/format_reward": 0.9826171875,
"rewards/frontier_coverage_0": 0.07571766301989555,
"rewards/frontier_coverage_1": 0.07571766301989555,
"rewards/frontier_coverage_10": 0.07571766301989555,
"rewards/frontier_coverage_15": 0.07571766301989555,
"rewards/frontier_coverage_20": 0.07571766301989555,
"rewards/frontier_coverage_25": 0.07571766301989555,
"rewards/frontier_coverage_5": 0.07571766301989555,
"rewards/frontier_entropy_batch_reward": -0.8742559432983399,
"signal/accuracy_reward/centered_abs_mean": 0.193359375,
"signal/accuracy_reward/group_std_mean": 0.24748140275478364,
"signal/accuracy_reward/group_zero_std_frac": 0.328125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7142280459403991,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0966796875,
"signal/advantage_abs_mean": 0.7126465678215027,
"signal/advantage_pre_scale_abs_mean": 0.1337364584207535,
"signal/advantage_pre_scale_std": 0.1942312479019165,
"signal/advantage_std": 0.9837595462799072,
"signal/brier_reward/centered_abs_mean": 0.23327789902687074,
"signal/brier_reward/group_std_mean": 0.28798535466194153,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17303505837917327,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.023327790945768357,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.09244537949562073,
"signal/confidence_uniqueness_reward/group_std_mean": 0.13309455364942552,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06808455139398575,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009244537819176913,
"signal/format_reward/centered_abs_mean": 0.0329345703125,
"signal/format_reward/group_std_mean": 0.08150013014674187,
"signal/format_reward/group_zero_std_frac": 0.59375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.11886298581957817,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.01646728515625,
"signal/frontier_coverage_0/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_0/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_1/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_1/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_10/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_10/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_15/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_15/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_20/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_20/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_25/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_25/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_coverage_5/centered_abs_mean": 0.13106433302164078,
"signal/frontier_coverage_5/group_std_mean": 0.19403213858604432,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.014053609594702721,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001874219998717308,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20934711396694183,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3334401249885559,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1583297297358513,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020934711396694183,
"step": 20
},
{
"calibration/aurc": 0.6791233532497524,
"calibration/batch_distribution_entropy": 0.9538421546715095,
"calibration/buffer_distribution_entropy": 0.7588070364798357,
"calibration/confidence_entropy": 0.5099153307060644,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.001171875,
"calibration/coverage@15%": 0.001171875,
"calibration/coverage@20%": 0.001171875,
"calibration/coverage@25%": 0.0015625,
"calibration/coverage@30%": 0.0015625,
"calibration/coverage@5%": 0.001171875,
"calibration/ece": 0.27117672410782256,
"calibration/mean_confidence": 0.47369882184296996,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00244140625,
"completions/max_length": 995.0,
"completions/max_terminated_length": 995.0,
"completions/mean_length": 104.07744140625,
"completions/mean_terminated_length": 104.32813873291016,
"completions/min_length": 0.0,
"completions/min_terminated_length": 33.8,
"epoch": 0.08,
"grad_norm": 0.060792941600084305,
"learning_rate": 1e-06,
"loss": -0.0061,
"num_tokens": 83209898.0,
"reward": 0.7957952499389649,
"reward_std": 0.1482664555311203,
"rewards/accuracy_reward": 0.34365234375,
"rewards/brier_reward": 0.7017360806465149,
"rewards/confidence_uniqueness_reward": 0.9248051285743714,
"rewards/format_reward": 0.9947265625,
"rewards/frontier_coverage_0": 0.13328106552362443,
"rewards/frontier_coverage_1": 0.13328106552362443,
"rewards/frontier_coverage_10": 0.13328106552362443,
"rewards/frontier_coverage_15": 0.13328106552362443,
"rewards/frontier_coverage_20": 0.13328106552362443,
"rewards/frontier_coverage_25": 0.13328106552362443,
"rewards/frontier_coverage_5": 0.13328106552362443,
"rewards/frontier_entropy_batch_reward": -0.4938975155353546,
"signal/accuracy_reward/centered_abs_mean": 0.192559814453125,
"signal/accuracy_reward/group_std_mean": 0.2381644457578659,
"signal/accuracy_reward/group_zero_std_frac": 0.384375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.871105182170868,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0962799072265625,
"signal/advantage_abs_mean": 0.769687807559967,
"signal/advantage_pre_scale_abs_mean": 0.11523678749799729,
"signal/advantage_pre_scale_std": 0.1617472231388092,
"signal/advantage_std": 0.983604621887207,
"signal/brier_reward/centered_abs_mean": 0.21707654893398284,
"signal/brier_reward/group_std_mean": 0.2683330178260803,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19545693695545197,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02170765623450279,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.04447389058768749,
"signal/confidence_uniqueness_reward/group_std_mean": 0.06878753378987312,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03905631639063358,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004447389068081975,
"signal/format_reward/centered_abs_mean": 0.01019287109375,
"signal/format_reward/group_std_mean": 0.02915844917297363,
"signal/format_reward/group_zero_std_frac": 0.8375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.04567938521504402,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.005096435546875,
"signal/frontier_coverage_0/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_0/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_1/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_1/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_10/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_10/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_15/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_15/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_20/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_20/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_25/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_25/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_coverage_5/centered_abs_mean": 0.2647977530956268,
"signal/frontier_coverage_5/group_std_mean": 0.3346329748630524,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03465293869376183,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037866079248487948,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44679933190345766,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5179579973220825,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40088812708854676,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044679935276508334,
"step": 25
},
{
"calibration/aurc": 0.6576899380734184,
"calibration/batch_distribution_entropy": 0.8272113669921964,
"calibration/buffer_distribution_entropy": 0.8702624531424125,
"calibration/confidence_entropy": 0.44559956564740794,
"calibration/coverage@0%": 0.000390625,
"calibration/coverage@1%": 0.000390625,
"calibration/coverage@10%": 0.000390625,
"calibration/coverage@15%": 0.000390625,
"calibration/coverage@20%": 0.000390625,
"calibration/coverage@25%": 0.000390625,
"calibration/coverage@30%": 0.000390625,
"calibration/coverage@5%": 0.000390625,
"calibration/ece": 0.17525078401977626,
"calibration/mean_confidence": 0.2572908789952067,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.002734375,
"completions/max_length": 1001.0,
"completions/max_terminated_length": 1001.0,
"completions/mean_length": 96.93759765625,
"completions/mean_terminated_length": 97.20596618652344,
"completions/min_length": 0.0,
"completions/min_terminated_length": 37.6,
"epoch": 0.096,
"grad_norm": 0.03620428219437599,
"learning_rate": 1e-06,
"loss": -0.0134,
"num_tokens": 99247147.0,
"reward": 0.7996835231781005,
"reward_std": 0.1173510953783989,
"rewards/accuracy_reward": 0.34423828125,
"rewards/brier_reward": 0.7268486857414246,
"rewards/confidence_uniqueness_reward": 0.9211776852607727,
"rewards/format_reward": 0.9951171875,
"rewards/frontier_coverage_0": 0.1958144187927246,
"rewards/frontier_coverage_1": 0.1958144187927246,
"rewards/frontier_coverage_10": 0.1958144187927246,
"rewards/frontier_coverage_15": 0.1958144187927246,
"rewards/frontier_coverage_20": 0.1958144187927246,
"rewards/frontier_coverage_25": 0.1958144187927246,
"rewards/frontier_coverage_5": 0.1958144187927246,
"rewards/frontier_entropy_batch_reward": -0.5439786970615387,
"signal/accuracy_reward/centered_abs_mean": 0.184832763671875,
"signal/accuracy_reward/group_std_mean": 0.2294948309659958,
"signal/accuracy_reward/group_zero_std_frac": 0.396875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.3462110042572022,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0924163818359375,
"signal/advantage_abs_mean": 0.7473811984062195,
"signal/advantage_pre_scale_abs_mean": 0.08806595504283905,
"signal/advantage_pre_scale_std": 0.13366734385490417,
"signal/advantage_std": 0.9830494165420532,
"signal/brier_reward/centered_abs_mean": 0.19736847281455994,
"signal/brier_reward/group_std_mean": 0.24908939003944397,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.288382089138031,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.019736847281455992,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.03614585101604462,
"signal/confidence_uniqueness_reward/group_std_mean": 0.05685581639409065,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.053042204678058626,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003614585101604462,
"signal/format_reward/centered_abs_mean": 0.0093994140625,
"signal/format_reward/group_std_mean": 0.02593981511890888,
"signal/format_reward/group_zero_std_frac": 0.859375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0692012570798397,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00469970703125,
"signal/frontier_coverage_0/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_0/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_1/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_1/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_10/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_10/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_15/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_15/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_20/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_20/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_25/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_25/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_coverage_5/centered_abs_mean": 0.3461109459400177,
"signal/frontier_coverage_5/group_std_mean": 0.42214337587356565,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.07239173352718353,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0049493865109980105,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4473235845565796,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.5099922716617584,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6549581050872803,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044732359051704404,
"step": 30
},
{
"calibration/aurc": 0.5592464061427573,
"calibration/batch_distribution_entropy": 0.9734186029823684,
"calibration/buffer_distribution_entropy": 0.9249716187963466,
"calibration/confidence_entropy": 0.5295920857549896,
"calibration/coverage@0%": 0.00078125,
"calibration/coverage@1%": 0.00078125,
"calibration/coverage@10%": 0.00078125,
"calibration/coverage@15%": 0.00078125,
"calibration/coverage@20%": 0.001953125,
"calibration/coverage@25%": 0.0035217524509803923,
"calibration/coverage@30%": 0.005474877450980392,
"calibration/coverage@5%": 0.00078125,
"calibration/ece": 0.229060973623976,
"calibration/mean_confidence": 0.43376112419377993,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 663.4,
"completions/max_terminated_length": 663.4,
"completions/mean_length": 94.01142578125,
"completions/mean_terminated_length": 94.13132019042969,
"completions/min_length": 7.6,
"completions/min_terminated_length": 40.0,
"epoch": 0.112,
"grad_norm": 0.016059977933764458,
"learning_rate": 1e-06,
"loss": -0.0048,
"num_tokens": 115319296.0,
"reward": 0.855149245262146,
"reward_std": 0.13141493052244185,
"rewards/accuracy_reward": 0.41474609375,
"rewards/brier_reward": 0.6991081237792969,
"rewards/confidence_uniqueness_reward": 0.9526235222816467,
"rewards/format_reward": 0.99775390625,
"rewards/frontier_coverage_0": 0.07727691400796174,
"rewards/frontier_coverage_1": 0.07727691400796174,
"rewards/frontier_coverage_10": 0.07727691400796174,
"rewards/frontier_coverage_15": 0.07727691400796174,
"rewards/frontier_coverage_20": 0.07727691400796174,
"rewards/frontier_coverage_25": 0.07727691400796174,
"rewards/frontier_coverage_5": 0.07727691400796174,
"rewards/frontier_entropy_batch_reward": -0.24009357690811156,
"signal/accuracy_reward/centered_abs_mean": 0.181622314453125,
"signal/accuracy_reward/group_std_mean": 0.23383015990257264,
"signal/accuracy_reward/group_zero_std_frac": 0.353125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9329653382301331,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0908111572265625,
"signal/advantage_abs_mean": 0.7803734302520752,
"signal/advantage_pre_scale_abs_mean": 0.10330383628606796,
"signal/advantage_pre_scale_std": 0.14460791051387786,
"signal/advantage_std": 0.9834746479988098,
"signal/brier_reward/centered_abs_mean": 0.21031469106674194,
"signal/brier_reward/group_std_mean": 0.2588426500558853,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.217045795917511,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021031468734145166,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01818895637989044,
"signal/confidence_uniqueness_reward/group_std_mean": 0.029530685395002365,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01874598637223244,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018188956892117858,
"signal/format_reward/centered_abs_mean": 0.004351806640625,
"signal/format_reward/group_std_mean": 0.012705824710428715,
"signal/format_reward/group_zero_std_frac": 0.928125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.022282978147268297,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125,
"signal/frontier_coverage_0/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_0/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_1/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_1/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_10/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_10/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_15/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_15/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_20/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_20/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_25/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_25/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_coverage_5/centered_abs_mean": 0.2851457536220551,
"signal/frontier_coverage_5/group_std_mean": 0.3561969459056854,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042065325379371646,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00407758429646492,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295664429664612,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.4042933166027069,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34022077918052673,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032956644892692566,
"step": 35
},
{
"calibration/aurc": 0.5485888003105004,
"calibration/batch_distribution_entropy": 0.9583806014639318,
"calibration/buffer_distribution_entropy": 0.9500481098032335,
"calibration/confidence_entropy": 0.4826562493346168,
"calibration/coverage@0%": 0.001960016883465715,
"calibration/coverage@1%": 0.001960016883465715,
"calibration/coverage@10%": 0.004312958059936303,
"calibration/coverage@15%": 0.004312958059936303,
"calibration/coverage@20%": 0.004312958059936303,
"calibration/coverage@25%": 0.00822532951536779,
"calibration/coverage@30%": 0.01018611382909328,
"calibration/coverage@5%": 0.001960016883465715,
"calibration/ece": 0.2487835388596178,
"calibration/mean_confidence": 0.37875214633636106,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 681.4,
"completions/max_terminated_length": 681.4,
"completions/mean_length": 98.33955078125,
"completions/mean_terminated_length": 98.4450439453125,
"completions/min_length": 8.0,
"completions/min_terminated_length": 41.2,
"epoch": 0.128,
"grad_norm": 0.013107044622302055,
"learning_rate": 1e-06,
"loss": -0.0014,
"num_tokens": 131242965.0,
"reward": 0.857842743396759,
"reward_std": 0.11681393682956695,
"rewards/accuracy_reward": 0.4255859375,
"rewards/brier_reward": 0.6927406430244446,
"rewards/confidence_uniqueness_reward": 0.9548697948455811,
"rewards/format_reward": 0.99853515625,
"rewards/frontier_coverage_0": 0.08065508380532264,
"rewards/frontier_coverage_1": 0.08065508380532264,
"rewards/frontier_coverage_10": 0.08065508380532264,
"rewards/frontier_coverage_15": 0.08065508380532264,
"rewards/frontier_coverage_20": 0.08065508380532264,
"rewards/frontier_coverage_25": 0.08065508380532264,
"rewards/frontier_coverage_5": 0.08065508380532264,
"rewards/frontier_entropy_batch_reward": -0.2705242335796356,
"signal/accuracy_reward/centered_abs_mean": 0.1486083984375,
"signal/accuracy_reward/group_std_mean": 0.19626019299030303,
"signal/accuracy_reward/group_zero_std_frac": 0.44375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8739246845245361,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.07430419921875,
"signal/advantage_abs_mean": 0.7811145305633544,
"signal/advantage_pre_scale_abs_mean": 0.09206330478191375,
"signal/advantage_pre_scale_std": 0.13003001511096954,
"signal/advantage_std": 0.9833110690116882,
"signal/brier_reward/centered_abs_mean": 0.22610692977905272,
"signal/brier_reward/group_std_mean": 0.2758404791355133,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.27061591744422914,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02261069305241108,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.021954387053847314,
"signal/confidence_uniqueness_reward/group_std_mean": 0.030994601547718048,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026323718205094337,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002195438789203763,
"signal/format_reward/centered_abs_mean": 0.002801513671875,
"signal/format_reward/group_std_mean": 0.0072774821892380714,
"signal/format_reward/group_zero_std_frac": 0.9625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.01556429360061884,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0014007568359375,
"signal/frontier_coverage_0/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_0/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_1/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_1/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_10/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_10/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_15/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_15/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_20/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_20/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_25/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_25/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_coverage_5/centered_abs_mean": 0.30873937606811525,
"signal/frontier_coverage_5/group_std_mean": 0.37858131527900696,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.05287817344069481,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00441497303545475,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3557403266429901,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.426521098613739,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42853416204452516,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035574034601449964,
"step": 40
},
{
"calibration/aurc": 0.40409018226935123,
"calibration/batch_distribution_entropy": 0.975256420237604,
"calibration/buffer_distribution_entropy": 0.9636264971133013,
"calibration/confidence_entropy": 0.5030471631787824,
"calibration/coverage@0%": 0.001171875,
"calibration/coverage@1%": 0.001171875,
"calibration/coverage@10%": 0.001171875,
"calibration/coverage@15%": 0.001171875,
"calibration/coverage@20%": 0.13203583659491194,
"calibration/coverage@25%": 0.20586931262230918,
"calibration/coverage@30%": 0.20586931262230918,
"calibration/coverage@5%": 0.001171875,
"calibration/ece": 0.2794678515618836,
"calibration/mean_confidence": 0.42314918125267065,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 686.0,
"completions/max_terminated_length": 686.0,
"completions/mean_length": 102.1638671875,
"completions/mean_terminated_length": 102.22365875244141,
"completions/min_length": 3.0,
"completions/min_terminated_length": 38.0,
"epoch": 0.144,
"grad_norm": 0.023353978991508484,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 147239555.0,
"reward": 0.9046274423599243,
"reward_std": 0.12026365250349044,
"rewards/accuracy_reward": 0.53173828125,
"rewards/brier_reward": 0.678732717037201,
"rewards/confidence_uniqueness_reward": 0.9554983496665954,
"rewards/format_reward": 0.99912109375,
"rewards/frontier_coverage_0": -0.005397527106106281,
"rewards/frontier_coverage_1": -0.005397527106106281,
"rewards/frontier_coverage_10": -0.005397527106106281,
"rewards/frontier_coverage_15": -0.005397527106106281,
"rewards/frontier_coverage_20": -0.005397527106106281,
"rewards/frontier_coverage_25": -0.005397527106106281,
"rewards/frontier_coverage_5": -0.005397527106106281,
"rewards/frontier_entropy_batch_reward": -0.23685037195682526,
"signal/accuracy_reward/centered_abs_mean": 0.150152587890625,
"signal/accuracy_reward/group_std_mean": 0.19809286296367645,
"signal/accuracy_reward/group_zero_std_frac": 0.4375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830261766910553,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0750762939453125,
"signal/advantage_abs_mean": 0.7871874570846558,
"signal/advantage_pre_scale_abs_mean": 0.09534858167171478,
"signal/advantage_pre_scale_std": 0.13183027058839797,
"signal/advantage_std": 0.9833981156349182,
"signal/brier_reward/centered_abs_mean": 0.23030579090118408,
"signal/brier_reward/group_std_mean": 0.2796397864818573,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2559266179800034,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02303057983517647,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.020615693554282187,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02922433577477932,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02314589861780405,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002061569388024509,
"signal/format_reward/centered_abs_mean": 0.001702880859375,
"signal/format_reward/group_std_mean": 0.004971844423562288,
"signal/format_reward/group_zero_std_frac": 0.971875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009363159909844399,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875,
"signal/frontier_coverage_0/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_0/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_1/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_1/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_10/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_10/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_15/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_15/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_20/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_20/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_25/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_25/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_coverage_5/centered_abs_mean": 0.3010148942470551,
"signal/frontier_coverage_5/group_std_mean": 0.37279834151268004,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04789614900946617,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004304513148963451,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3241081744432449,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3993211805820465,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3629483371973038,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03241081647574902,
"step": 45
},
{
"calibration/aurc": 0.47796316607726597,
"calibration/batch_distribution_entropy": 0.9817986151177764,
"calibration/buffer_distribution_entropy": 0.9728652858416347,
"calibration/confidence_entropy": 0.530073439985547,
"calibration/coverage@0%": 0.001953125,
"calibration/coverage@1%": 0.001953125,
"calibration/coverage@10%": 0.001953125,
"calibration/coverage@15%": 0.001953125,
"calibration/coverage@20%": 0.008988197162426614,
"calibration/coverage@25%": 0.008988197162426614,
"calibration/coverage@30%": 0.012113197162426615,
"calibration/coverage@5%": 0.001953125,
"calibration/ece": 0.19193092135956252,
"calibration/mean_confidence": 0.5419844403512751,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0013671875,
"completions/max_length": 804.8,
"completions/max_terminated_length": 804.8,
"completions/mean_length": 111.23623046875,
"completions/mean_terminated_length": 111.38824768066407,
"completions/min_length": 0.0,
"completions/min_terminated_length": 46.4,
"epoch": 0.16,
"grad_norm": 0.02443806827068329,
"learning_rate": 1e-06,
"loss": -0.0073,
"num_tokens": 163399542.0,
"reward": 0.883971381187439,
"reward_std": 0.12701284289360046,
"rewards/accuracy_reward": 0.4640625,
"rewards/brier_reward": 0.705234956741333,
"rewards/confidence_uniqueness_reward": 0.955999755859375,
"rewards/format_reward": 0.9986328125,
"rewards/frontier_coverage_0": 0.048603178933262825,
"rewards/frontier_coverage_1": 0.048603178933262825,
"rewards/frontier_coverage_10": 0.048603178933262825,
"rewards/frontier_coverage_15": 0.048603178933262825,
"rewards/frontier_coverage_20": 0.048603178933262825,
"rewards/frontier_coverage_25": 0.048603178933262825,
"rewards/frontier_coverage_5": 0.048603178933262825,
"rewards/frontier_entropy_batch_reward": -0.18364944458007812,
"signal/accuracy_reward/centered_abs_mean": 0.14886474609375,
"signal/accuracy_reward/group_std_mean": 0.19415634870529175,
"signal/accuracy_reward/group_zero_std_frac": 0.4625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.781326687335968,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.074432373046875,
"signal/advantage_abs_mean": 0.7767141819000244,
"signal/advantage_pre_scale_abs_mean": 0.09897643923759461,
"signal/advantage_pre_scale_std": 0.14180286526679992,
"signal/advantage_std": 0.983447003364563,
"signal/brier_reward/centered_abs_mean": 0.21131813228130342,
"signal/brier_reward/group_std_mean": 0.2597563862800598,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2243928611278534,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.021131813526153564,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014774037152528762,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022573205083608626,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01570458896458149,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001477403729222715,
"signal/format_reward/centered_abs_mean": 0.00264892578125,
"signal/format_reward/group_std_mean": 0.007733980193734169,
"signal/format_reward/group_zero_std_frac": 0.95625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.014173118397593498,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001324462890625,
"signal/frontier_coverage_0/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_0/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_1/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_1/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_10/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_10/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_15/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_15/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_20/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_20/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_25/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_25/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_coverage_5/centered_abs_mean": 0.22484306693077089,
"signal/frontier_coverage_5/group_std_mean": 0.29105273485183714,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03399848416447639,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032152560073882342,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27477757930755614,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35714380741119384,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.29268457293510436,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02747775800526142,
"step": 50
},
{
"epoch": 0.16,
"eval_calibration/aurc": 0.5952072642536581,
"eval_calibration/batch_distribution_entropy": 0.9297086608557112,
"eval_calibration/buffer_distribution_entropy": 0.9766844148452357,
"eval_calibration/confidence_entropy": 0.5220518976892288,
"eval_calibration/coverage@0%": 0.03125,
"eval_calibration/coverage@1%": 0.03125,
"eval_calibration/coverage@10%": 0.03125,
"eval_calibration/coverage@15%": 0.03125,
"eval_calibration/coverage@20%": 0.03125,
"eval_calibration/coverage@25%": 0.0546875,
"eval_calibration/coverage@30%": 0.09375,
"eval_calibration/coverage@5%": 0.03125,
"eval_calibration/ece": 0.2956597683100823,
"eval_calibration/mean_confidence": 0.5218693570576916,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 282.25,
"eval_completions/max_terminated_length": 282.25,
"eval_completions/mean_length": 110.61812973022461,
"eval_completions/mean_terminated_length": 110.61812973022461,
"eval_completions/min_length": 54.5,
"eval_completions/min_terminated_length": 54.5,
"eval_loss": 0.0,
"eval_num_tokens": 163399542.0,
"eval_reward": 0.7699112445116043,
"eval_reward_std": 0.22320134565234184,
"eval_rewards/accuracy_reward": 0.396484375,
"eval_rewards/brier_reward": 0.7137555778026581,
"eval_rewards/confidence_uniqueness_reward": 0.896240234375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.10658804513514042,
"eval_rewards/frontier_coverage_1": 0.10658804513514042,
"eval_rewards/frontier_coverage_10": 0.10658804513514042,
"eval_rewards/frontier_coverage_15": 0.10658804513514042,
"eval_rewards/frontier_coverage_20": 0.10658804513514042,
"eval_rewards/frontier_coverage_25": 0.10658804513514042,
"eval_rewards/frontier_coverage_5": 0.10658804513514042,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 16.3101,
"eval_samples_per_second": 30.656,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4571533203125,
"eval_signal/accuracy_reward/group_std_mean": 0.4844451770186424,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0287865847349167,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22857666015625,
"eval_signal/advantage_abs_mean": 0.9094541817903519,
"eval_signal/advantage_pre_scale_abs_mean": 0.2041856087744236,
"eval_signal/advantage_pre_scale_std": 0.22104624286293983,
"eval_signal/advantage_std": 0.9876727759838104,
"eval_signal/brier_reward/centered_abs_mean": 0.22672728821635246,
"eval_signal/brier_reward/group_std_mean": 0.278080090880394,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10234775766730309,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.022672730032354593,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434112548828125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.052254452370107174,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019623446743935347,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004341125721111894,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_15/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_20/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_25/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.30074895173311234,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4004068300127983,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01945252064615488,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004300709872040898,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.245,
"step": 50
},
{
"calibration/aurc": 0.4293310472623869,
"calibration/batch_distribution_entropy": 0.991187044559398,
"calibration/buffer_distribution_entropy": 0.9787306580823815,
"calibration/confidence_entropy": 0.5058137954460664,
"calibration/coverage@0%": 0.0,
"calibration/coverage@1%": 0.0,
"calibration/coverage@10%": 0.0,
"calibration/coverage@15%": 0.0027397260273972603,
"calibration/coverage@20%": 0.03790973581213307,
"calibration/coverage@25%": 0.04533390410958904,
"calibration/coverage@30%": 0.0867653803816047,
"calibration/coverage@5%": 0.0,
"calibration/ece": 0.19659801829272178,
"calibration/mean_confidence": 0.4781637847637869,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 531.0,
"completions/max_terminated_length": 531.0,
"completions/mean_length": 111.984375,
"completions/mean_terminated_length": 112.01715240478515,
"completions/min_length": 19.6,
"completions/min_terminated_length": 48.6,
"epoch": 0.176,
"grad_norm": 0.016006283462047577,
"learning_rate": 1e-06,
"loss": 0.0029,
"num_tokens": 179783382.0,
"reward": 0.8927610993385315,
"reward_std": 0.11393142342567444,
"rewards/accuracy_reward": 0.46640625,
"rewards/brier_reward": 0.7212023615837098,
"rewards/confidence_uniqueness_reward": 0.9581124544143677,
"rewards/format_reward": 0.999609375,
"rewards/frontier_coverage_0": 0.07742121592164039,
"rewards/frontier_coverage_1": 0.07742121592164039,
"rewards/frontier_coverage_10": 0.07742121592164039,
"rewards/frontier_coverage_15": 0.07742121592164039,
"rewards/frontier_coverage_20": 0.07742121592164039,
"rewards/frontier_coverage_25": 0.07742121592164039,
"rewards/frontier_coverage_5": 0.07742121592164039,
"rewards/frontier_entropy_batch_reward": -0.1592807114124298,
"signal/accuracy_reward/centered_abs_mean": 0.141259765625,
"signal/accuracy_reward/group_std_mean": 0.17928344309329985,
"signal/accuracy_reward/group_zero_std_frac": 0.5125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8460919618606567,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0706298828125,
"signal/advantage_abs_mean": 0.7873261332511902,
"signal/advantage_pre_scale_abs_mean": 0.09122534543275833,
"signal/advantage_pre_scale_std": 0.12952570170164107,
"signal/advantage_std": 0.9832993149757385,
"signal/brier_reward/centered_abs_mean": 0.20927453339099883,
"signal/brier_reward/group_std_mean": 0.2582443118095398,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2531407684087753,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.02092745341360569,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013171698711812497,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01759280003607273,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015911542251706122,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013171698665246367,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004652977641671896,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_coverage_0/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_0/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_1/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_1/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_10/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_10/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_15/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_15/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_20/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_20/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_25/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_25/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_coverage_5/centered_abs_mean": 0.25559466183185575,
"signal/frontier_coverage_5/group_std_mean": 0.3214977204799652,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04414609596133232,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003655003709718585,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2443944036960602,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3236381232738495,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.29559103548526766,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024439441785216332,
"step": 55
},
{
"calibration/aurc": 0.3614467503210282,
"calibration/batch_distribution_entropy": 0.9762620070934765,
"calibration/buffer_distribution_entropy": 0.982707586617636,
"calibration/confidence_entropy": 0.47250542952602925,
"calibration/coverage@0%": 0.0023483365949119373,
"calibration/coverage@1%": 0.0023483365949119373,
"calibration/coverage@10%": 0.014081610812133072,
"calibration/coverage@15%": 0.026194807974559687,
"calibration/coverage@20%": 0.059058524951076316,
"calibration/coverage@25%": 0.1892421416340509,
"calibration/coverage@30%": 0.30892398483365946,
"calibration/coverage@5%": 0.0023483365949119373,
"calibration/ece": 0.15341142419107695,
"calibration/mean_confidence": 0.43818721331033733,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 459.4,
"completions/max_terminated_length": 459.4,
"completions/mean_length": 112.08486328125,
"completions/mean_terminated_length": 112.14996032714843,
"completions/min_length": 10.0,
"completions/min_terminated_length": 48.6,
"epoch": 0.192,
"grad_norm": 0.02954723685979843,
"learning_rate": 1e-06,
"loss": -0.0022,
"num_tokens": 195745947.0,
"reward": 0.9055456638336181,
"reward_std": 0.11155757009983062,
"rewards/accuracy_reward": 0.497265625,
"rewards/brier_reward": 0.7332705736160279,
"rewards/confidence_uniqueness_reward": 0.9544866442680359,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.08700279919430613,
"rewards/frontier_coverage_1": 0.08700279919430613,
"rewards/frontier_coverage_10": 0.08700279919430613,
"rewards/frontier_coverage_15": 0.08700279919430613,
"rewards/frontier_coverage_20": 0.08700279919430613,
"rewards/frontier_coverage_25": 0.08700279919430613,
"rewards/frontier_coverage_5": 0.08700279919430613,
"rewards/frontier_entropy_batch_reward": -0.20278894305229186,
"signal/accuracy_reward/centered_abs_mean": 0.139501953125,
"signal/accuracy_reward/group_std_mean": 0.18089237213134765,
"signal/accuracy_reward/group_zero_std_frac": 0.496875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9052997827529907,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0697509765625,
"signal/advantage_abs_mean": 0.775652015209198,
"signal/advantage_pre_scale_abs_mean": 0.08811791241168976,
"signal/advantage_pre_scale_std": 0.12764054387807847,
"signal/advantage_std": 0.983202064037323,
"signal/brier_reward/centered_abs_mean": 0.20351653397083283,
"signal/brier_reward/group_std_mean": 0.25207469165325164,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.26565858721733093,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.020351653546094896,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016574647277593613,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022703318297863005,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02189209684729576,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00165746477432549,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007327704038470983,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_coverage_0/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_0/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_1/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_1/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_10/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_10/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_15/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_15/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_20/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_20/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_25/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_25/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_coverage_5/centered_abs_mean": 0.2656306028366089,
"signal/frontier_coverage_5/group_std_mean": 0.33098281025886533,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04965348467230797,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003798517771065235,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28159146904945376,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3595281183719635,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3709623396396637,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02815914712846279,
"step": 60
},
{
"calibration/aurc": 0.30011243720009795,
"calibration/batch_distribution_entropy": 0.9771462317737031,
"calibration/buffer_distribution_entropy": 0.9847172441681387,
"calibration/confidence_entropy": 0.5078898292737076,
"calibration/coverage@0%": 0.02265701443248532,
"calibration/coverage@1%": 0.02265701443248532,
"calibration/coverage@10%": 0.1675788894324853,
"calibration/coverage@15%": 0.301340050146771,
"calibration/coverage@20%": 0.3647283206947162,
"calibration/coverage@25%": 0.428515625,
"calibration/coverage@30%": 0.47742554427592954,
"calibration/coverage@5%": 0.08320388943248533,
"calibration/ece": 0.17360716271690285,
"calibration/mean_confidence": 0.5349996273034741,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 443.2,
"completions/max_terminated_length": 443.2,
"completions/mean_length": 114.509765625,
"completions/mean_terminated_length": 114.54372100830078,
"completions/min_length": 20.6,
"completions/min_terminated_length": 52.0,
"epoch": 0.208,
"grad_norm": 0.03158510476350784,
"learning_rate": 1e-06,
"loss": -0.0023,
"num_tokens": 211950751.0,
"reward": 0.9300533056259155,
"reward_std": 0.10566971302032471,
"rewards/accuracy_reward": 0.54306640625,
"rewards/brier_reward": 0.7536995768547058,
"rewards/confidence_uniqueness_reward": 0.9566167235374451,
"rewards/format_reward": 0.999609375,
"rewards/frontier_coverage_0": 0.0559085650369525,
"rewards/frontier_coverage_1": 0.0559085650369525,
"rewards/frontier_coverage_10": 0.0559085650369525,
"rewards/frontier_coverage_15": 0.0559085650369525,
"rewards/frontier_coverage_20": 0.0559085650369525,
"rewards/frontier_coverage_25": 0.0559085650369525,
"rewards/frontier_coverage_5": 0.0559085650369525,
"rewards/frontier_entropy_batch_reward": -0.1791268080472946,
"signal/accuracy_reward/centered_abs_mean": 0.123236083984375,
"signal/accuracy_reward/group_std_mean": 0.16450151801109314,
"signal/accuracy_reward/group_zero_std_frac": 0.521875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8466056942939758,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0616180419921875,
"signal/advantage_abs_mean": 0.7682868838310242,
"signal/advantage_pre_scale_abs_mean": 0.08178776204586029,
"signal/advantage_pre_scale_std": 0.1228803813457489,
"signal/advantage_std": 0.9831345796585083,
"signal/brier_reward/centered_abs_mean": 0.1688483715057373,
"signal/brier_reward/group_std_mean": 0.2128828853368759,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23237936198711395,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.016884836927056314,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011919040419161319,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016283450275659563,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016335343569517137,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011919040698558092,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005191143415868282,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_coverage_0/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_0/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_1/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_1/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_10/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_10/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_15/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_15/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_20/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_20/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_25/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_25/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_coverage_5/centered_abs_mean": 0.19982794523239136,
"signal/frontier_coverage_5/group_std_mean": 0.2555452287197113,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03945437371730805,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028575395699590445,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25831425189971924,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33621604442596437,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3541386485099792,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025831427052617074,
"step": 65
},
{
"calibration/aurc": 0.2777412552876487,
"calibration/batch_distribution_entropy": 0.9853763418017791,
"calibration/buffer_distribution_entropy": 0.9872255378108227,
"calibration/confidence_entropy": 0.49916279117895535,
"calibration/coverage@0%": 0.036328125,
"calibration/coverage@1%": 0.036328125,
"calibration/coverage@10%": 0.207421875,
"calibration/coverage@15%": 0.3203125,
"calibration/coverage@20%": 0.44375,
"calibration/coverage@25%": 0.530859375,
"calibration/coverage@30%": 0.6,
"calibration/coverage@5%": 0.108984375,
"calibration/ece": 0.15090117081731733,
"calibration/mean_confidence": 0.45539058067229243,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 460.0,
"completions/max_terminated_length": 460.0,
"completions/mean_length": 116.38125,
"completions/mean_terminated_length": 116.39248504638672,
"completions/min_length": 47.6,
"completions/min_terminated_length": 59.2,
"epoch": 0.224,
"grad_norm": 0.019556866958737373,
"learning_rate": 1e-06,
"loss": 0.0031,
"num_tokens": 228295679.0,
"reward": 0.91881822347641,
"reward_std": 0.08917870223522187,
"rewards/accuracy_reward": 0.502734375,
"rewards/brier_reward": 0.784402334690094,
"rewards/confidence_uniqueness_reward": 0.9537890195846558,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.12251347005367279,
"rewards/frontier_coverage_1": 0.12251347005367279,
"rewards/frontier_coverage_10": 0.12251347005367279,
"rewards/frontier_coverage_15": 0.12251347005367279,
"rewards/frontier_coverage_20": 0.12251347005367279,
"rewards/frontier_coverage_25": 0.12251347005367279,
"rewards/frontier_coverage_5": 0.12251347005367279,
"rewards/frontier_entropy_batch_reward": -0.18582858741283417,
"signal/accuracy_reward/centered_abs_mean": 0.10262451171875,
"signal/accuracy_reward/group_std_mean": 0.13685290068387984,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8474323511123657,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.051312255859375,
"signal/advantage_abs_mean": 0.7694117426872253,
"signal/advantage_pre_scale_abs_mean": 0.06965965777635574,
"signal/advantage_pre_scale_std": 0.10694814324378968,
"signal/advantage_std": 0.9828300595283508,
"signal/brier_reward/centered_abs_mean": 0.14114340543746948,
"signal/brier_reward/group_std_mean": 0.18228788077831268,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23655705153942108,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.014114340580999852,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012035387381911278,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015354960411787032,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020498888939619063,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012035387801006437,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018597409129142762,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_0/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_1/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_1/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_10/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_10/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_15/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_15/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_20/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_20/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_25/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_25/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_coverage_5/centered_abs_mean": 0.19413280189037324,
"signal/frontier_coverage_5/group_std_mean": 0.2504078775644302,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046854938566684726,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027760989032685757,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2574520826339722,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33517150282859803,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43392097353935244,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02574520818889141,
"step": 70
},
{
"calibration/aurc": 0.3451851171793069,
"calibration/batch_distribution_entropy": 0.9558716587565168,
"calibration/buffer_distribution_entropy": 0.9895516259326991,
"calibration/confidence_entropy": 0.48006278103461353,
"calibration/coverage@0%": 0.015625,
"calibration/coverage@1%": 0.015625,
"calibration/coverage@10%": 0.15859375,
"calibration/coverage@15%": 0.22265625,
"calibration/coverage@20%": 0.250390625,
"calibration/coverage@25%": 0.266015625,
"calibration/coverage@30%": 0.351171875,
"calibration/coverage@5%": 0.0578125,
"calibration/ece": 0.16036406672020234,
"calibration/mean_confidence": 0.5099695563505072,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 394.2,
"completions/max_terminated_length": 394.2,
"completions/mean_length": 119.859765625,
"completions/mean_terminated_length": 119.87151336669922,
"completions/min_length": 49.6,
"completions/min_terminated_length": 62.6,
"epoch": 0.24,
"grad_norm": 0.014534401707351208,
"learning_rate": 1e-06,
"loss": -0.0062,
"num_tokens": 244774723.0,
"reward": 0.9338017344474793,
"reward_std": 0.09010614305734635,
"rewards/accuracy_reward": 0.5482421875,
"rewards/brier_reward": 0.7745696544647217,
"rewards/confidence_uniqueness_reward": 0.9516302347183228,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.0856390755623579,
"rewards/frontier_coverage_1": 0.0856390755623579,
"rewards/frontier_coverage_10": 0.0856390755623579,
"rewards/frontier_coverage_15": 0.0856390755623579,
"rewards/frontier_coverage_20": 0.0856390755623579,
"rewards/frontier_coverage_25": 0.0856390755623579,
"rewards/frontier_coverage_5": 0.0856390755623579,
"rewards/frontier_entropy_batch_reward": -0.21463005542755126,
"signal/accuracy_reward/centered_abs_mean": 0.1029052734375,
"signal/accuracy_reward/group_std_mean": 0.13757123202085494,
"signal/accuracy_reward/group_zero_std_frac": 0.609375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8875034332275391,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.05145263671875,
"signal/advantage_abs_mean": 0.7700738310813904,
"signal/advantage_pre_scale_abs_mean": 0.06997058242559433,
"signal/advantage_pre_scale_std": 0.10958524942398071,
"signal/advantage_std": 0.9827866315841675,
"signal/brier_reward/centered_abs_mean": 0.13409124910831452,
"signal/brier_reward/group_std_mean": 0.17349932193756104,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23266932964324952,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.013409125059843064,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012924287095665931,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016424901597201825,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02235339842736721,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012924287468194962,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001489312667399645,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_0/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_1/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_1/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_10/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_10/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_15/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_15/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_20/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_20/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_25/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_25/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_coverage_5/centered_abs_mean": 0.16890641450881957,
"signal/frontier_coverage_5/group_std_mean": 0.22147968411445618,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04203609824180603,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002415361627936363,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2731468856334686,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3471518874168396,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47121843695640564,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02731468863785267,
"step": 75
},
{
"calibration/aurc": 0.2380899878689502,
"calibration/batch_distribution_entropy": 0.9543793623709981,
"calibration/buffer_distribution_entropy": 0.9904958080647177,
"calibration/confidence_entropy": 0.4724079818447585,
"calibration/coverage@0%": 0.038671875,
"calibration/coverage@1%": 0.038671875,
"calibration/coverage@10%": 0.319921875,
"calibration/coverage@15%": 0.382421875,
"calibration/coverage@20%": 0.448828125,
"calibration/coverage@25%": 0.51875,
"calibration/coverage@30%": 0.60625,
"calibration/coverage@5%": 0.196875,
"calibration/ece": 0.14697595971741417,
"calibration/mean_confidence": 0.5045763309062191,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 375.0,
"completions/max_terminated_length": 375.0,
"completions/mean_length": 130.82294921875,
"completions/mean_terminated_length": 130.8369934082031,
"completions/min_length": 51.0,
"completions/min_terminated_length": 64.2,
"epoch": 0.256,
"grad_norm": 0.011844088323414326,
"learning_rate": 1e-06,
"loss": -0.0062,
"num_tokens": 261169166.0,
"reward": 0.9297836661338806,
"reward_std": 0.08369718790054322,
"rewards/accuracy_reward": 0.54404296875,
"rewards/brier_reward": 0.7865662574768066,
"rewards/confidence_uniqueness_reward": 0.9491964101791381,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.10523570999503136,
"rewards/frontier_coverage_1": 0.10523570999503136,
"rewards/frontier_coverage_10": 0.10523570999503136,
"rewards/frontier_coverage_15": 0.10523570999503136,
"rewards/frontier_coverage_20": 0.10523570999503136,
"rewards/frontier_coverage_25": 0.10523570999503136,
"rewards/frontier_coverage_5": 0.10523570999503136,
"rewards/frontier_entropy_batch_reward": -0.2629933536052704,
"signal/accuracy_reward/centered_abs_mean": 0.088494873046875,
"signal/accuracy_reward/group_std_mean": 0.12247141897678375,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7810031771659851,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0442474365234375,
"signal/advantage_abs_mean": 0.7776289224624634,
"signal/advantage_pre_scale_abs_mean": 0.0648388609290123,
"signal/advantage_pre_scale_std": 0.1018882930278778,
"signal/advantage_std": 0.9827472686767578,
"signal/brier_reward/centered_abs_mean": 0.11603698432445526,
"signal/brier_reward/group_std_mean": 0.1506718337535858,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.204732221364975,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011603698506951332,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014407002925872802,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018257852271199228,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025521285086870193,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001440700376406312,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017066342756152154,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_0/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_1/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_1/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_10/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_10/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_15/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_15/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_20/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_20/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_25/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_25/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_coverage_5/centered_abs_mean": 0.14679024815559388,
"signal/frontier_coverage_5/group_std_mean": 0.1926429718732834,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03715235441923141,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020991005701944234,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3055807054042816,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3773996353149414,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5410493850708008,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030558070167899132,
"step": 80
},
{
"calibration/aurc": 0.3613767694028809,
"calibration/batch_distribution_entropy": 0.9694824840031633,
"calibration/buffer_distribution_entropy": 0.9915738867672383,
"calibration/confidence_entropy": 0.5049346154650323,
"calibration/coverage@0%": 0.030859375,
"calibration/coverage@1%": 0.030859375,
"calibration/coverage@10%": 0.1234375,
"calibration/coverage@15%": 0.1546875,
"calibration/coverage@20%": 0.27734375,
"calibration/coverage@25%": 0.353125,
"calibration/coverage@30%": 0.4,
"calibration/coverage@5%": 0.061328125,
"calibration/ece": 0.1336495954580236,
"calibration/mean_confidence": 0.46672275323620094,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 450.0,
"completions/max_terminated_length": 450.0,
"completions/mean_length": 153.59951171875,
"completions/mean_terminated_length": 153.61471862792968,
"completions/min_length": 60.8,
"completions/min_terminated_length": 77.8,
"epoch": 0.272,
"grad_norm": 0.011426495388150215,
"learning_rate": 1e-06,
"loss": -0.0031,
"num_tokens": 277707721.0,
"reward": 0.9228897452354431,
"reward_std": 0.08293161988258362,
"rewards/accuracy_reward": 0.51865234375,
"rewards/brier_reward": 0.788739800453186,
"rewards/confidence_uniqueness_reward": 0.9513486862182617,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.12113445177674294,
"rewards/frontier_coverage_1": 0.12113445177674294,
"rewards/frontier_coverage_10": 0.12113445177674294,
"rewards/frontier_coverage_15": 0.12113445177674294,
"rewards/frontier_coverage_20": 0.12113445177674294,
"rewards/frontier_coverage_25": 0.12113445177674294,
"rewards/frontier_coverage_5": 0.12113445177674294,
"rewards/frontier_entropy_batch_reward": -0.22522012591362,
"signal/accuracy_reward/centered_abs_mean": 0.101690673828125,
"signal/accuracy_reward/group_std_mean": 0.13314552009105682,
"signal/accuracy_reward/group_zero_std_frac": 0.628125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9522302627563477,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0508453369140625,
"signal/advantage_abs_mean": 0.7683973193168641,
"signal/advantage_pre_scale_abs_mean": 0.06439381539821624,
"signal/advantage_pre_scale_std": 0.10144704878330231,
"signal/advantage_std": 0.9826340436935425,
"signal/brier_reward/centered_abs_mean": 0.11361265182495117,
"signal/brier_reward/group_std_mean": 0.1463920384645462,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2135068655014038,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011361265368759633,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012319310754537582,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015735189616680145,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023272840678691863,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012319311266765,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018666807562112808,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_0/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_1/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_1/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_10/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_10/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_15/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_15/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_20/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_20/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_25/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_25/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_coverage_5/centered_abs_mean": 0.17033348679542543,
"signal/frontier_coverage_5/group_std_mean": 0.21856584250926972,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04595231339335441,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002435768861323595,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2705967366695404,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34300823211669923,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.510302847623825,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027059673517942428,
"step": 85
},
{
"calibration/aurc": 0.27859286701221453,
"calibration/batch_distribution_entropy": 0.970638670654175,
"calibration/buffer_distribution_entropy": 0.9927677427137654,
"calibration/confidence_entropy": 0.478569513861055,
"calibration/coverage@0%": 0.022666187622309198,
"calibration/coverage@1%": 0.022666187622309198,
"calibration/coverage@10%": 0.15825892857142856,
"calibration/coverage@15%": 0.22705173679060664,
"calibration/coverage@20%": 0.27435099681996084,
"calibration/coverage@25%": 0.3427592954990215,
"calibration/coverage@30%": 0.4955708781800391,
"calibration/coverage@5%": 0.0640724376223092,
"calibration/ece": 0.12627269961365964,
"calibration/mean_confidence": 0.5429138799075158,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 443.8,
"completions/max_terminated_length": 443.8,
"completions/mean_length": 161.62763671875,
"completions/mean_terminated_length": 161.62763671875,
"completions/min_length": 79.2,
"completions/min_terminated_length": 79.2,
"epoch": 0.288,
"grad_norm": 0.011343343183398247,
"learning_rate": 1e-06,
"loss": -0.0037,
"num_tokens": 294320964.0,
"reward": 0.9370681405067444,
"reward_std": 0.0841323509812355,
"rewards/accuracy_reward": 0.55107421875,
"rewards/brier_reward": 0.790608286857605,
"rewards/confidence_uniqueness_reward": 0.9515504360198974,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.10305131077766419,
"rewards/frontier_coverage_1": 0.10305131077766419,
"rewards/frontier_coverage_10": 0.10305131077766419,
"rewards/frontier_coverage_15": 0.10305131077766419,
"rewards/frontier_coverage_20": 0.10305131077766419,
"rewards/frontier_coverage_25": 0.10305131077766419,
"rewards/frontier_coverage_5": 0.10305131077766419,
"rewards/frontier_entropy_batch_reward": -0.22902617156505584,
"signal/accuracy_reward/centered_abs_mean": 0.098126220703125,
"signal/accuracy_reward/group_std_mean": 0.13257125914096832,
"signal/accuracy_reward/group_zero_std_frac": 0.60625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8775075793266296,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0490631103515625,
"signal/advantage_abs_mean": 0.7683995246887207,
"signal/advantage_pre_scale_abs_mean": 0.06486314833164215,
"signal/advantage_pre_scale_std": 0.10212174206972122,
"signal/advantage_std": 0.9827188014984131,
"signal/brier_reward/centered_abs_mean": 0.11409407407045365,
"signal/brier_reward/group_std_mean": 0.14835602343082427,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20434542298316954,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011409407667815685,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012881199643015862,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016685626842081545,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02331661656498909,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001288120006211102,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003475642204284668,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_0/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_1/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_1/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_10/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_10/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_15/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_15/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_20/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_20/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_25/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_25/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_coverage_5/centered_abs_mean": 0.15965526700019836,
"signal/frontier_coverage_5/group_std_mean": 0.20462646484375,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04117161184549332,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022830702364444733,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28002009987831117,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3523731052875519,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5039195537567138,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02800200991332531,
"step": 90
},
{
"calibration/aurc": 0.27400511315915477,
"calibration/batch_distribution_entropy": 0.9699587413307474,
"calibration/buffer_distribution_entropy": 0.9931079326030978,
"calibration/confidence_entropy": 0.4935722923864724,
"calibration/coverage@0%": 0.023046875,
"calibration/coverage@1%": 0.023046875,
"calibration/coverage@10%": 0.130078125,
"calibration/coverage@15%": 0.20546875,
"calibration/coverage@20%": 0.308984375,
"calibration/coverage@25%": 0.5109375,
"calibration/coverage@30%": 0.6390625,
"calibration/coverage@5%": 0.052734375,
"calibration/ece": 0.08987905700365367,
"calibration/mean_confidence": 0.5164384378723209,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 547.6,
"completions/max_terminated_length": 547.6,
"completions/mean_length": 181.79541015625,
"completions/mean_terminated_length": 181.84756164550782,
"completions/min_length": 38.0,
"completions/min_terminated_length": 92.0,
"epoch": 0.304,
"grad_norm": 0.009509476833045483,
"learning_rate": 1e-06,
"loss": -0.0015,
"num_tokens": 311112501.0,
"reward": 0.926408588886261,
"reward_std": 0.08505538254976272,
"rewards/accuracy_reward": 0.524609375,
"rewards/brier_reward": 0.7812718510627746,
"rewards/confidence_uniqueness_reward": 0.9513449430465698,
"rewards/format_reward": 0.999609375,
"rewards/frontier_coverage_0": 0.11590675860643387,
"rewards/frontier_coverage_1": 0.11590675860643387,
"rewards/frontier_coverage_10": 0.11590675860643387,
"rewards/frontier_coverage_15": 0.11590675860643387,
"rewards/frontier_coverage_20": 0.11590675860643387,
"rewards/frontier_coverage_25": 0.11590675860643387,
"rewards/frontier_coverage_5": 0.11590675860643387,
"rewards/frontier_entropy_batch_reward": -0.20564735531806946,
"signal/accuracy_reward/centered_abs_mean": 0.10882568359375,
"signal/accuracy_reward/group_std_mean": 0.14444092959165572,
"signal/accuracy_reward/group_zero_std_frac": 0.58125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.974223279953003,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.054412841796875,
"signal/advantage_abs_mean": 0.7569803476333619,
"signal/advantage_pre_scale_abs_mean": 0.06511303558945655,
"signal/advantage_pre_scale_std": 0.10234367698431016,
"signal/advantage_std": 0.9826973438262939,
"signal/brier_reward/centered_abs_mean": 0.11599338501691818,
"signal/brier_reward/group_std_mean": 0.14832000136375428,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20899596214294433,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.011599338613450527,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01347355991601944,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018178258277475833,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024797194078564642,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013473560102283955,
"signal/format_reward/centered_abs_mean": 0.0007568359375,
"signal/format_reward/group_std_mean": 0.0022097086533904076,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007120777480304241,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.00037841796875,
"signal/frontier_coverage_0/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_0/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_1/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_1/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_10/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_10/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_15/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_15/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_20/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_20/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_25/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_25/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_coverage_5/centered_abs_mean": 0.1757221668958664,
"signal/frontier_coverage_5/group_std_mean": 0.22423993349075316,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04559025391936302,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002512827096506953,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26609655022621154,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3424116730690002,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4856810808181763,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02660965621471405,
"step": 95
},
{
"calibration/aurc": 0.20195095907390134,
"calibration/batch_distribution_entropy": 0.9734814252165089,
"calibration/buffer_distribution_entropy": 0.9936861512598052,
"calibration/confidence_entropy": 0.4821587175226608,
"calibration/coverage@0%": 0.1,
"calibration/coverage@1%": 0.130859375,
"calibration/coverage@10%": 0.361328125,
"calibration/coverage@15%": 0.449609375,
"calibration/coverage@20%": 0.55546875,
"calibration/coverage@25%": 0.678515625,
"calibration/coverage@30%": 0.750390625,
"calibration/coverage@5%": 0.287109375,
"calibration/ece": 0.145156608944858,
"calibration/mean_confidence": 0.5455970664283126,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 594.6,
"completions/max_terminated_length": 594.6,
"completions/mean_length": 192.10126953125,
"completions/mean_terminated_length": 192.21360473632814,
"completions/min_length": 39.0,
"completions/min_terminated_length": 82.6,
"epoch": 0.32,
"grad_norm": 0.01124265231192112,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 328168322.0,
"reward": 0.938688862323761,
"reward_std": 0.07619319260120391,
"rewards/accuracy_reward": 0.54619140625,
"rewards/brier_reward": 0.8016320586204528,
"rewards/confidence_uniqueness_reward": 0.951561689376831,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_coverage_0": 0.11333505995571613,
"rewards/frontier_coverage_1": 0.11333505995571613,
"rewards/frontier_coverage_10": 0.11333505995571613,
"rewards/frontier_coverage_15": 0.11333505995571613,
"rewards/frontier_coverage_20": 0.11333505995571613,
"rewards/frontier_coverage_25": 0.11333505995571613,
"rewards/frontier_coverage_5": 0.11333505995571613,
"rewards/frontier_entropy_batch_reward": -0.20729252099990844,
"signal/accuracy_reward/centered_abs_mean": 0.083489990234375,
"signal/accuracy_reward/group_std_mean": 0.11030419915914536,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8474804639816285,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0417449951171875,
"signal/advantage_abs_mean": 0.7693067193031311,
"signal/advantage_pre_scale_abs_mean": 0.058793623745441434,
"signal/advantage_pre_scale_std": 0.09535037130117416,
"signal/advantage_std": 0.982493007183075,
"signal/brier_reward/centered_abs_mean": 0.09994795173406601,
"signal/brier_reward/group_std_mean": 0.13043854236602784,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20334738492965698,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0099947951734066,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013709683902561665,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018594534881412984,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027801194787025453,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013709683902561665,
"signal/format_reward/centered_abs_mean": 0.001300048828125,
"signal/format_reward/group_std_mean": 0.003194373194128275,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.013123654946684837,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625,
"signal/frontier_coverage_0/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_0/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_1/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_1/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_10/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_10/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_15/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_15/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_20/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_20/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_25/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_25/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_coverage_5/centered_abs_mean": 0.1436137169599533,
"signal/frontier_coverage_5/group_std_mean": 0.18481407761573793,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041797750443220136,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002053676126524806,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2712838649749756,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3431327760219574,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5484427690505982,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02712838724255562,
"step": 100
},
{
"epoch": 0.32,
"eval_calibration/aurc": 0.4838447164186258,
"eval_calibration/batch_distribution_entropy": 0.9087856722466727,
"eval_calibration/buffer_distribution_entropy": 0.9937493854082251,
"eval_calibration/confidence_entropy": 0.48006873213412093,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.0625,
"eval_calibration/coverage@15%": 0.0625,
"eval_calibration/coverage@20%": 0.0859375,
"eval_calibration/coverage@25%": 0.1953125,
"eval_calibration/coverage@30%": 0.2265625,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.19939102374750206,
"eval_calibration/mean_confidence": 0.4764055658567398,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 371.0,
"eval_completions/max_terminated_length": 371.0,
"eval_completions/mean_length": 199.60984802246094,
"eval_completions/mean_terminated_length": 199.60984802246094,
"eval_completions/min_length": 117.75,
"eval_completions/min_terminated_length": 117.75,
"eval_loss": 0.0,
"eval_num_tokens": 328168322.0,
"eval_reward": 0.795561820268631,
"eval_reward_std": 0.22068889066576958,
"eval_rewards/accuracy_reward": 0.408203125,
"eval_rewards/brier_reward": 0.8055337518453598,
"eval_rewards/confidence_uniqueness_reward": 0.900390625,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.2084694728255272,
"eval_rewards/frontier_coverage_1": 0.2084694728255272,
"eval_rewards/frontier_coverage_10": 0.2084694728255272,
"eval_rewards/frontier_coverage_15": 0.2084694728255272,
"eval_rewards/frontier_coverage_20": 0.2084694728255272,
"eval_rewards/frontier_coverage_25": 0.2084694728255272,
"eval_rewards/frontier_coverage_5": 0.2084694728255272,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 20.2095,
"eval_samples_per_second": 24.741,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4696044921875,
"eval_signal/accuracy_reward/group_std_mean": 0.4919809103012085,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0656675398349762,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23480224609375,
"eval_signal/advantage_abs_mean": 0.9272723495960236,
"eval_signal/advantage_pre_scale_abs_mean": 0.2049938254058361,
"eval_signal/advantage_pre_scale_std": 0.21827252581715584,
"eval_signal/advantage_std": 0.9876697510480881,
"eval_signal/brier_reward/centered_abs_mean": 0.18734565749764442,
"eval_signal/brier_reward/group_std_mean": 0.23995699733495712,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08503718301653862,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01873456547036767,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037811279296875,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.044189696200191975,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017147937789559364,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037811279762536287,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_0/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_1/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_10/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_15/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_20/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_25/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.35348744690418243,
"eval_signal/frontier_coverage_5/group_std_mean": 0.42605313658714294,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022959773894399405,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005054870503954589,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.198,
"step": 100
},
{
"calibration/aurc": 0.26602253089668076,
"calibration/batch_distribution_entropy": 0.9677981082899623,
"calibration/buffer_distribution_entropy": 0.9949084560096411,
"calibration/confidence_entropy": 0.49570299589735656,
"calibration/coverage@0%": 0.02149278375733855,
"calibration/coverage@1%": 0.02149278375733855,
"calibration/coverage@10%": 0.1090508806262231,
"calibration/coverage@15%": 0.18800605430528378,
"calibration/coverage@20%": 0.29751406555772997,
"calibration/coverage@25%": 0.5710066046966732,
"calibration/coverage@30%": 0.7069708598336595,
"calibration/coverage@5%": 0.023055283757338552,
"calibration/ece": 0.11665484961072996,
"calibration/mean_confidence": 0.5079856328555454,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 564.6,
"completions/max_terminated_length": 564.6,
"completions/mean_length": 196.78076171875,
"completions/mean_terminated_length": 196.8188690185547,
"completions/min_length": 61.0,
"completions/min_terminated_length": 100.4,
"epoch": 0.336,
"grad_norm": 0.008793617598712444,
"learning_rate": 1e-06,
"loss": 0.0022,
"num_tokens": 344905789.0,
"reward": 0.9403074264526368,
"reward_std": 0.07877994924783707,
"rewards/accuracy_reward": 0.55146484375,
"rewards/brier_reward": 0.8012589335441589,
"rewards/confidence_uniqueness_reward": 0.9518896460533142,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.10983360260725021,
"rewards/frontier_coverage_1": 0.10983360260725021,
"rewards/frontier_coverage_10": 0.10983360260725021,
"rewards/frontier_coverage_15": 0.10983360260725021,
"rewards/frontier_coverage_20": 0.10983360260725021,
"rewards/frontier_coverage_25": 0.10983360260725021,
"rewards/frontier_coverage_5": 0.10983360260725021,
"rewards/frontier_entropy_batch_reward": -0.21636542975902556,
"signal/accuracy_reward/centered_abs_mean": 0.087689208984375,
"signal/accuracy_reward/group_std_mean": 0.11891601234674454,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.863071084022522,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0438446044921875,
"signal/advantage_abs_mean": 0.7660103678703308,
"signal/advantage_pre_scale_abs_mean": 0.060546606034040454,
"signal/advantage_pre_scale_std": 0.0978748396039009,
"signal/advantage_std": 0.9825198411941528,
"signal/brier_reward/centered_abs_mean": 0.10040059238672257,
"signal/brier_reward/group_std_mean": 0.1297599822282791,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20032111704349517,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010040059126913548,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012283951044082642,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015965880826115608,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02450355812907219,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012283950811251998,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036628665402531624,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_0/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_1/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_1/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_10/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_10/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_15/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_15/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_20/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_20/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_25/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_25/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_coverage_5/centered_abs_mean": 0.14154843986034393,
"signal/frontier_coverage_5/group_std_mean": 0.18509421646595,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0403674952685833,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020241427468135954,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2685790777206421,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3413450360298157,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5357294917106629,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026857908815145493,
"step": 105
},
{
"calibration/aurc": 0.25054426640565053,
"calibration/batch_distribution_entropy": 0.9506281697531032,
"calibration/buffer_distribution_entropy": 0.9976023879596688,
"calibration/confidence_entropy": 0.43937590127320797,
"calibration/coverage@0%": 0.118359375,
"calibration/coverage@1%": 0.130859375,
"calibration/coverage@10%": 0.30546875,
"calibration/coverage@15%": 0.440625,
"calibration/coverage@20%": 0.505078125,
"calibration/coverage@25%": 0.551171875,
"calibration/coverage@30%": 0.619140625,
"calibration/coverage@5%": 0.195703125,
"calibration/ece": 0.11887008576271958,
"calibration/mean_confidence": 0.44458669144123214,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 562.2,
"completions/max_terminated_length": 562.2,
"completions/mean_length": 201.9111328125,
"completions/mean_terminated_length": 201.95140380859374,
"completions/min_length": 61.0,
"completions/min_terminated_length": 101.8,
"epoch": 0.352,
"grad_norm": 0.01325867511332035,
"learning_rate": 1e-06,
"loss": -0.0013,
"num_tokens": 362233775.0,
"reward": 0.9122628808021546,
"reward_std": 0.0789569452404976,
"rewards/accuracy_reward": 0.483984375,
"rewards/brier_reward": 0.8145543575286865,
"rewards/confidence_uniqueness_reward": 0.9494835615158081,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.17774035930633544,
"rewards/frontier_coverage_1": 0.17774035930633544,
"rewards/frontier_coverage_10": 0.17774035930633544,
"rewards/frontier_coverage_15": 0.17774035930633544,
"rewards/frontier_coverage_20": 0.17774035930633544,
"rewards/frontier_coverage_25": 0.17774035930633544,
"rewards/frontier_coverage_5": 0.17774035930633544,
"rewards/frontier_entropy_batch_reward": -0.23778423070907592,
"signal/accuracy_reward/centered_abs_mean": 0.09801025390625,
"signal/accuracy_reward/group_std_mean": 0.12496584504842759,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0013089537620545,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.049005126953125,
"signal/advantage_abs_mean": 0.7665701508522034,
"signal/advantage_pre_scale_abs_mean": 0.06204437762498856,
"signal/advantage_pre_scale_std": 0.09929190725088119,
"signal/advantage_std": 0.9824689507484436,
"signal/brier_reward/centered_abs_mean": 0.10055015981197357,
"signal/brier_reward/group_std_mean": 0.1297568753361702,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20597705543041228,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010055016353726387,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013859933055937291,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018241026997566225,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028573965653777122,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013859933242201805,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006001142412424087,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_0/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_1/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_1/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_10/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_10/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_15/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_15/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_20/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_20/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_25/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_25/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_coverage_5/centered_abs_mean": 0.1629865735769272,
"signal/frontier_coverage_5/group_std_mean": 0.20776084065437317,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0477764330804348,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023307080380618574,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26890730261802676,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34364842176437377,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5524909615516662,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02689073123037815,
"step": 110
},
{
"calibration/aurc": 0.3163110406084484,
"calibration/batch_distribution_entropy": 0.9615172344168172,
"calibration/buffer_distribution_entropy": 0.9982005314375307,
"calibration/confidence_entropy": 0.4628636321817998,
"calibration/coverage@0%": 0.0390625,
"calibration/coverage@1%": 0.0390625,
"calibration/coverage@10%": 0.1140625,
"calibration/coverage@15%": 0.169140625,
"calibration/coverage@20%": 0.350390625,
"calibration/coverage@25%": 0.41796875,
"calibration/coverage@30%": 0.521484375,
"calibration/coverage@5%": 0.073046875,
"calibration/ece": 0.1247380959840438,
"calibration/mean_confidence": 0.507100053732364,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 588.6,
"completions/max_terminated_length": 588.6,
"completions/mean_length": 209.62236328125,
"completions/mean_terminated_length": 209.66268310546874,
"completions/min_length": 64.4,
"completions/min_terminated_length": 105.6,
"epoch": 0.368,
"grad_norm": 0.01056207250803709,
"learning_rate": 1e-06,
"loss": -0.0015,
"num_tokens": 379445780.0,
"reward": 0.9280990719795227,
"reward_std": 0.07095708847045898,
"rewards/accuracy_reward": 0.52216796875,
"rewards/brier_reward": 0.8045044064521789,
"rewards/confidence_uniqueness_reward": 0.950655996799469,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.1437540665268898,
"rewards/frontier_coverage_1": 0.1437540665268898,
"rewards/frontier_coverage_10": 0.1437540665268898,
"rewards/frontier_coverage_15": 0.1437540665268898,
"rewards/frontier_coverage_20": 0.1437540665268898,
"rewards/frontier_coverage_25": 0.14409504383802413,
"rewards/frontier_coverage_5": 0.1437540665268898,
"rewards/frontier_entropy_batch_reward": -0.22749127745628356,
"signal/accuracy_reward/centered_abs_mean": 0.073712158203125,
"signal/accuracy_reward/group_std_mean": 0.09910732954740524,
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7912806749343873,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0368560791015625,
"signal/advantage_abs_mean": 0.772281539440155,
"signal/advantage_pre_scale_abs_mean": 0.05444162786006927,
"signal/advantage_pre_scale_std": 0.0894496574997902,
"signal/advantage_std": 0.9823481917381287,
"signal/brier_reward/centered_abs_mean": 0.09946328550577163,
"signal/brier_reward/group_std_mean": 0.12772661596536636,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21652222871780397,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009946328960359097,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013414673879742623,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017754827439785004,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029049182683229445,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013414673740044236,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.006210983730852604,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_0/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_coverage_1/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_1/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_coverage_10/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_10/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_coverage_15/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_15/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_coverage_20/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_20/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_coverage_25/centered_abs_mean": 0.14017903208732604,
"signal/frontier_coverage_25/group_std_mean": 0.17928515374660492,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0433915801346302,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020045602228492498,
"signal/frontier_coverage_5/centered_abs_mean": 0.14087859094142913,
"signal/frontier_coverage_5/group_std_mean": 0.18012421131134032,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04363641962409019,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002014563884586096,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2679985582828522,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34032227396965026,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5820306539535522,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026799855381250383,
"step": 115
},
{
"calibration/aurc": 0.291872629605271,
"calibration/batch_distribution_entropy": 0.9695508342320689,
"calibration/buffer_distribution_entropy": 0.9974123973699189,
"calibration/confidence_entropy": 0.470467238089471,
"calibration/coverage@0%": 0.108984375,
"calibration/coverage@1%": 0.179296875,
"calibration/coverage@10%": 0.296484375,
"calibration/coverage@15%": 0.32578125,
"calibration/coverage@20%": 0.3609375,
"calibration/coverage@25%": 0.400390625,
"calibration/coverage@30%": 0.451953125,
"calibration/coverage@5%": 0.241015625,
"calibration/ece": 0.16722453167166276,
"calibration/mean_confidence": 0.4628005829226116,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 796.8,
"completions/max_terminated_length": 796.8,
"completions/mean_length": 212.99912109375,
"completions/mean_terminated_length": 213.1254455566406,
"completions/min_length": 22.4,
"completions/min_terminated_length": 101.4,
"epoch": 0.384,
"grad_norm": 0.01185943465679884,
"learning_rate": 1e-06,
"loss": 0.0008,
"num_tokens": 396483403.0,
"reward": 0.9401529908180237,
"reward_std": 0.07879967391490936,
"rewards/accuracy_reward": 0.55224609375,
"rewards/brier_reward": 0.8110327959060669,
"rewards/confidence_uniqueness_reward": 0.9498059153556824,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.12273619621992111,
"rewards/frontier_coverage_1": 0.12273619621992111,
"rewards/frontier_coverage_10": 0.12273619621992111,
"rewards/frontier_coverage_15": 0.12273619621992111,
"rewards/frontier_coverage_20": 0.12227635085582733,
"rewards/frontier_coverage_25": 0.11141111701726913,
"rewards/frontier_coverage_5": 0.12273619621992111,
"rewards/frontier_entropy_batch_reward": -0.23878300786018372,
"signal/accuracy_reward/centered_abs_mean": 0.089276123046875,
"signal/accuracy_reward/group_std_mean": 0.12189686745405197,
"signal/accuracy_reward/group_zero_std_frac": 0.634375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8961179256439209,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0446380615234375,
"signal/advantage_abs_mean": 0.7660392999649048,
"signal/advantage_pre_scale_abs_mean": 0.06024746969342232,
"signal/advantage_pre_scale_std": 0.0975036308169365,
"signal/advantage_std": 0.9825122117996216,
"signal/brier_reward/centered_abs_mean": 0.09333293437957764,
"signal/brier_reward/group_std_mean": 0.12096812278032303,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18763849139213562,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009333293326199055,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013957409746944904,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01862836182117462,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02798389606177807,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013957409886643291,
"signal/format_reward/centered_abs_mean": 0.00111083984375,
"signal/format_reward/group_std_mean": 0.0026419460773468018,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0111533023416996,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000555419921875,
"signal/frontier_coverage_0/centered_abs_mean": 0.14243515133857726,
"signal/frontier_coverage_0/group_std_mean": 0.1850941926240921,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041002404689788816,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002036822633817792,
"signal/frontier_coverage_1/centered_abs_mean": 0.14243515133857726,
"signal/frontier_coverage_1/group_std_mean": 0.1850941926240921,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041002404689788816,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002036822633817792,
"signal/frontier_coverage_10/centered_abs_mean": 0.14243515133857726,
"signal/frontier_coverage_10/group_std_mean": 0.1850941926240921,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041002404689788816,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002036822633817792,
"signal/frontier_coverage_15/centered_abs_mean": 0.14243515133857726,
"signal/frontier_coverage_15/group_std_mean": 0.1850941926240921,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.041002404689788816,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002036822633817792,
"signal/frontier_coverage_20/centered_abs_mean": 0.14176848232746125,
"signal/frontier_coverage_20/group_std_mean": 0.1842800945043564,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.040808319300413134,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020272892899811266,
"signal/frontier_coverage_25/centered_abs_mean": 0.12744964957237243,
"signal/frontier_coverage_25/group_std_mean": 0.16639121770858764,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03667210936546326,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018225299660116433,
"signal/frontier_coverage_5/centered_abs_mean": 0.14243515133857726,
"signal/frontier_coverage_5/group_std_mean": 0.1850941926240921,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041002404689788816,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002036822633817792,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27840029299259184,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34748801589012146,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5590726673603058,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02784002907574177,
"step": 120
},
{
"calibration/aurc": 0.37680810561623346,
"calibration/batch_distribution_entropy": 0.977627751790393,
"calibration/buffer_distribution_entropy": 0.9969336700930927,
"calibration/confidence_entropy": 0.5016000149395057,
"calibration/coverage@0%": 0.01691244452662722,
"calibration/coverage@1%": 0.01691244452662722,
"calibration/coverage@10%": 0.08933062130177515,
"calibration/coverage@15%": 0.13830128205128206,
"calibration/coverage@20%": 0.1840930103550296,
"calibration/coverage@25%": 0.22992711415187378,
"calibration/coverage@30%": 0.288245808678501,
"calibration/coverage@5%": 0.03613011587771203,
"calibration/ece": 0.13497199447987554,
"calibration/mean_confidence": 0.5139445167093368,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00126953125,
"completions/max_length": 550.0,
"completions/max_terminated_length": 550.0,
"completions/mean_length": 213.1404296875,
"completions/mean_terminated_length": 213.41405029296874,
"completions/min_length": 66.8,
"completions/min_terminated_length": 86.4,
"epoch": 0.4,
"grad_norm": 0.0097922058776021,
"learning_rate": 1e-06,
"loss": -0.006,
"num_tokens": 413702409.0,
"reward": 0.9193836450576782,
"reward_std": 0.09198853373527527,
"rewards/accuracy_reward": 0.523828125,
"rewards/brier_reward": 0.7929557800292969,
"rewards/confidence_uniqueness_reward": 0.947076940536499,
"rewards/format_reward": 0.9984375,
"rewards/frontier_coverage_0": 0.12256665341556072,
"rewards/frontier_coverage_1": 0.12256665341556072,
"rewards/frontier_coverage_10": 0.12256665341556072,
"rewards/frontier_coverage_15": 0.12178453020751476,
"rewards/frontier_coverage_20": 0.119577931240201,
"rewards/frontier_coverage_25": 0.10327411331236362,
"rewards/frontier_coverage_5": 0.12256665341556072,
"rewards/frontier_entropy_batch_reward": -0.27691553235054017,
"signal/accuracy_reward/centered_abs_mean": 0.10191650390625,
"signal/accuracy_reward/group_std_mean": 0.1383904129266739,
"signal/accuracy_reward/group_zero_std_frac": 0.590625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8994345784187316,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.050958251953125,
"signal/advantage_abs_mean": 0.7682394504547119,
"signal/advantage_pre_scale_abs_mean": 0.07057161033153533,
"signal/advantage_pre_scale_std": 0.11377080827951432,
"signal/advantage_std": 0.9827415585517884,
"signal/brier_reward/centered_abs_mean": 0.10817324370145798,
"signal/brier_reward/group_std_mean": 0.13862178921699525,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19183254837989808,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010817324556410313,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.016499792411923408,
"signal/confidence_uniqueness_reward/group_std_mean": 0.022187639772892,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029168443754315376,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016499792458489538,
"signal/format_reward/centered_abs_mean": 0.00272216796875,
"signal/format_reward/group_std_mean": 0.0051541978027671576,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02258917409926653,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001361083984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.13868848383426666,
"signal/frontier_coverage_0/group_std_mean": 0.18039654791355134,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03508671894669533,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001983245322480798,
"signal/frontier_coverage_1/centered_abs_mean": 0.13868848383426666,
"signal/frontier_coverage_1/group_std_mean": 0.18039654791355134,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03508671894669533,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001983245322480798,
"signal/frontier_coverage_10/centered_abs_mean": 0.13868848383426666,
"signal/frontier_coverage_10/group_std_mean": 0.18039654791355134,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03508671894669533,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001983245322480798,
"signal/frontier_coverage_15/centered_abs_mean": 0.13756284713745118,
"signal/frontier_coverage_15/group_std_mean": 0.17895146012306212,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034796612709760665,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019671486690640448,
"signal/frontier_coverage_20/centered_abs_mean": 0.1341120943427086,
"signal/frontier_coverage_20/group_std_mean": 0.17450543940067292,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03391275852918625,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019178029382601381,
"signal/frontier_coverage_25/centered_abs_mean": 0.10924447625875473,
"signal/frontier_coverage_25/group_std_mean": 0.14243850409984588,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02756657600402832,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015621959697455167,
"signal/frontier_coverage_5/centered_abs_mean": 0.13868848383426666,
"signal/frontier_coverage_5/group_std_mean": 0.18039654791355134,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03508671894669533,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001983245322480798,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30260345339775085,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.37170406579971316,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.538099491596222,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030260344967246056,
"step": 125
},
{
"calibration/aurc": 0.2657363718758238,
"calibration/batch_distribution_entropy": 0.9714411064590933,
"calibration/buffer_distribution_entropy": 0.9981619683864125,
"calibration/confidence_entropy": 0.4767882249726864,
"calibration/coverage@0%": 0.015234375,
"calibration/coverage@1%": 0.015234375,
"calibration/coverage@10%": 0.119140625,
"calibration/coverage@15%": 0.19296875,
"calibration/coverage@20%": 0.306640625,
"calibration/coverage@25%": 0.56015625,
"calibration/coverage@30%": 0.67890625,
"calibration/coverage@5%": 0.0484375,
"calibration/ece": 0.11560704005460878,
"calibration/mean_confidence": 0.5249319950650804,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 555.0,
"completions/max_terminated_length": 555.0,
"completions/mean_length": 218.605078125,
"completions/mean_terminated_length": 218.64696350097657,
"completions/min_length": 69.6,
"completions/min_terminated_length": 92.0,
"epoch": 0.416,
"grad_norm": 0.01152903400361538,
"learning_rate": 1e-06,
"loss": -0.0007,
"num_tokens": 430822109.0,
"reward": 0.9380086183547973,
"reward_std": 0.07931896895170212,
"rewards/accuracy_reward": 0.54072265625,
"rewards/brier_reward": 0.814470624923706,
"rewards/confidence_uniqueness_reward": 0.9511290192604065,
"rewards/format_reward": 0.999609375,
"rewards/frontier_coverage_0": 0.13615846037864685,
"rewards/frontier_coverage_1": 0.13615846037864685,
"rewards/frontier_coverage_10": 0.13615846037864685,
"rewards/frontier_coverage_15": 0.1354565665125847,
"rewards/frontier_coverage_20": 0.13337061703205108,
"rewards/frontier_coverage_25": 0.11261514723300933,
"rewards/frontier_coverage_5": 0.13615846037864685,
"rewards/frontier_entropy_batch_reward": -0.21960244774818422,
"signal/accuracy_reward/centered_abs_mean": 0.086480712890625,
"signal/accuracy_reward/group_std_mean": 0.11708700507879258,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8494049906730652,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0432403564453125,
"signal/advantage_abs_mean": 0.7665403366088868,
"signal/advantage_pre_scale_abs_mean": 0.06138978749513626,
"signal/advantage_pre_scale_std": 0.10005667060613632,
"signal/advantage_std": 0.9825314521789551,
"signal/brier_reward/centered_abs_mean": 0.09667001217603684,
"signal/brier_reward/group_std_mean": 0.12675763815641403,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19187456667423247,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009667001478374005,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013016079366207123,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016788151860237122,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02605282999575138,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013016079319640994,
"signal/format_reward/centered_abs_mean": 0.00072021484375,
"signal/format_reward/group_std_mean": 0.0014778789598494768,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.007402191683650017,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000360107421875,
"signal/frontier_coverage_0/centered_abs_mean": 0.14196341782808303,
"signal/frontier_coverage_0/group_std_mean": 0.18739462196826934,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039983388781547544,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002030076924711466,
"signal/frontier_coverage_1/centered_abs_mean": 0.14196341782808303,
"signal/frontier_coverage_1/group_std_mean": 0.18739462196826934,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039983388781547544,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002030076924711466,
"signal/frontier_coverage_10/centered_abs_mean": 0.14196341782808303,
"signal/frontier_coverage_10/group_std_mean": 0.18739462196826934,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039983388781547544,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002030076924711466,
"signal/frontier_coverage_15/centered_abs_mean": 0.14051727205514908,
"signal/frontier_coverage_15/group_std_mean": 0.1854836732149124,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0395765632390976,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020093969767913224,
"signal/frontier_coverage_20/centered_abs_mean": 0.13647186160087585,
"signal/frontier_coverage_20/group_std_mean": 0.1800833076238632,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03843574151396752,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019515476189553738,
"signal/frontier_coverage_25/centered_abs_mean": 0.10870475172996522,
"signal/frontier_coverage_25/group_std_mean": 0.1432620793581009,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030629120394587518,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015544779598712921,
"signal/frontier_coverage_5/centered_abs_mean": 0.14196341782808303,
"signal/frontier_coverage_5/group_std_mean": 0.18739462196826934,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.039983388781547544,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002030076924711466,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27345497012138364,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3447937786579132,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.545515489578247,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027345497533679007,
"step": 130
},
{
"calibration/aurc": 0.2394546361506189,
"calibration/batch_distribution_entropy": 0.9664220854755664,
"calibration/buffer_distribution_entropy": 0.9978692133192111,
"calibration/confidence_entropy": 0.44952975490033015,
"calibration/coverage@0%": 0.029296875,
"calibration/coverage@1%": 0.029296875,
"calibration/coverage@10%": 0.223046875,
"calibration/coverage@15%": 0.2796875,
"calibration/coverage@20%": 0.430078125,
"calibration/coverage@25%": 0.510546875,
"calibration/coverage@30%": 0.701171875,
"calibration/coverage@5%": 0.112890625,
"calibration/ece": 0.13110902721049844,
"calibration/mean_confidence": 0.5009625814429861,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 600.2,
"completions/max_terminated_length": 600.2,
"completions/mean_length": 220.32685546875,
"completions/mean_terminated_length": 220.3916229248047,
"completions/min_length": 44.4,
"completions/min_terminated_length": 109.8,
"epoch": 0.432,
"grad_norm": 0.014838258735835552,
"learning_rate": 1e-06,
"loss": -0.0003,
"num_tokens": 448092592.0,
"reward": 0.9469399571418762,
"reward_std": 0.07579587548971176,
"rewards/accuracy_reward": 0.56298828125,
"rewards/brier_reward": 0.8220736503601074,
"rewards/confidence_uniqueness_reward": 0.9500454664230347,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.1361553490161896,
"rewards/frontier_coverage_1": 0.1361553490161896,
"rewards/frontier_coverage_10": 0.13602706864476205,
"rewards/frontier_coverage_15": 0.13480819016695023,
"rewards/frontier_coverage_20": 0.13133834376931192,
"rewards/frontier_coverage_25": 0.10667677372694015,
"rewards/frontier_coverage_5": 0.1361553490161896,
"rewards/frontier_entropy_batch_reward": -0.24737223386764526,
"signal/accuracy_reward/centered_abs_mean": 0.086553955078125,
"signal/accuracy_reward/group_std_mean": 0.11445238739252091,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9037783741950989,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0432769775390625,
"signal/advantage_abs_mean": 0.7740630865097046,
"signal/advantage_pre_scale_abs_mean": 0.05964174121618271,
"signal/advantage_pre_scale_std": 0.09494355022907257,
"signal/advantage_std": 0.9824240684509278,
"signal/brier_reward/centered_abs_mean": 0.09422143697738647,
"signal/brier_reward/group_std_mean": 0.12117904126644134,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19863314628601075,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009422143734991551,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013962790369987488,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018255869299173354,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029283100739121437,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013962790602818132,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00573458094149828,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.1540747344493866,
"signal/frontier_coverage_0/group_std_mean": 0.19681704938411712,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04627474918961525,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022032687440514563,
"signal/frontier_coverage_1/centered_abs_mean": 0.1540747344493866,
"signal/frontier_coverage_1/group_std_mean": 0.19681704938411712,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04627474918961525,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022032687440514563,
"signal/frontier_coverage_10/centered_abs_mean": 0.1538453459739685,
"signal/frontier_coverage_10/group_std_mean": 0.1965191125869751,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.046209176629781724,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002199988439679146,
"signal/frontier_coverage_15/centered_abs_mean": 0.15173088908195495,
"signal/frontier_coverage_15/group_std_mean": 0.19380154609680175,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.045577727258205414,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002169751701876521,
"signal/frontier_coverage_20/centered_abs_mean": 0.1440102219581604,
"signal/frontier_coverage_20/group_std_mean": 0.18400128185749054,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.043285074084997176,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020593460416421295,
"signal/frontier_coverage_25/centered_abs_mean": 0.10412507951259613,
"signal/frontier_coverage_25/group_std_mean": 0.13293323963880538,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03133079074323177,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014889885671436787,
"signal/frontier_coverage_5/centered_abs_mean": 0.1540747344493866,
"signal/frontier_coverage_5/group_std_mean": 0.19681704938411712,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04627474918961525,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022032687440514563,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2915738165378571,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36391377449035645,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6120963454246521,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029157382622361182,
"step": 135
},
{
"calibration/aurc": 0.24097328644526012,
"calibration/batch_distribution_entropy": 0.9634986379181794,
"calibration/buffer_distribution_entropy": 0.997582320879768,
"calibration/confidence_entropy": 0.49413236744092554,
"calibration/coverage@0%": 0.026953125,
"calibration/coverage@1%": 0.026953125,
"calibration/coverage@10%": 0.084375,
"calibration/coverage@15%": 0.22353458292563602,
"calibration/coverage@20%": 0.41814839163405093,
"calibration/coverage@25%": 0.5553043970156556,
"calibration/coverage@30%": 0.7393208781800391,
"calibration/coverage@5%": 0.04921875,
"calibration/ece": 0.09394564360698121,
"calibration/mean_confidence": 0.5647713994787259,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00078125,
"completions/max_length": 575.0,
"completions/max_terminated_length": 575.0,
"completions/mean_length": 226.0078125,
"completions/mean_terminated_length": 226.18872985839843,
"completions/min_length": 41.4,
"completions/min_terminated_length": 103.4,
"epoch": 0.448,
"grad_norm": 0.014536180533468723,
"learning_rate": 1e-06,
"loss": -0.0019,
"num_tokens": 465359712.0,
"reward": 0.934195339679718,
"reward_std": 0.08252922743558884,
"rewards/accuracy_reward": 0.5310546875,
"rewards/brier_reward": 0.8201260566711426,
"rewards/confidence_uniqueness_reward": 0.950800085067749,
"rewards/format_reward": 0.998828125,
"rewards/frontier_coverage_0": 0.14471644312143325,
"rewards/frontier_coverage_1": 0.14471644312143325,
"rewards/frontier_coverage_10": 0.14471644312143325,
"rewards/frontier_coverage_15": 0.14337524473667146,
"rewards/frontier_coverage_20": 0.13384944051504136,
"rewards/frontier_coverage_25": 0.10090558081865311,
"rewards/frontier_coverage_5": 0.14471644312143325,
"rewards/frontier_entropy_batch_reward": -0.21523725092411042,
"signal/accuracy_reward/centered_abs_mean": 0.091162109375,
"signal/accuracy_reward/group_std_mean": 0.11895354390144348,
"signal/accuracy_reward/group_zero_std_frac": 0.659375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8764581441879272,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0455810546875,
"signal/advantage_abs_mean": 0.7745669841766357,
"signal/advantage_pre_scale_abs_mean": 0.06429816111922264,
"signal/advantage_pre_scale_std": 0.10496852099895478,
"signal/advantage_std": 0.9825978994369506,
"signal/brier_reward/centered_abs_mean": 0.09663276970386506,
"signal/brier_reward/group_std_mean": 0.12589550763368607,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18589994609355925,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009663277119398118,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014237134903669357,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019735709950327873,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0274048775434494,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014237135415896772,
"signal/format_reward/centered_abs_mean": 0.00208740234375,
"signal/format_reward/group_std_mean": 0.004553806036710739,
"signal/format_reward/group_zero_std_frac": 0.978125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.020145339518785478,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.001043701171875,
"signal/frontier_coverage_0/centered_abs_mean": 0.1478554666042328,
"signal/frontier_coverage_0/group_std_mean": 0.18775065541267394,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.040648031234741214,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002114333095960319,
"signal/frontier_coverage_1/centered_abs_mean": 0.1478554666042328,
"signal/frontier_coverage_1/group_std_mean": 0.18775065541267394,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.040648031234741214,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002114333095960319,
"signal/frontier_coverage_10/centered_abs_mean": 0.1478554666042328,
"signal/frontier_coverage_10/group_std_mean": 0.18775065541267394,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.040648031234741214,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002114333095960319,
"signal/frontier_coverage_15/centered_abs_mean": 0.14521766006946563,
"signal/frontier_coverage_15/group_std_mean": 0.18448179364204406,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.039922721683979034,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020766125060617925,
"signal/frontier_coverage_20/centered_abs_mean": 0.12875569313764573,
"signal/frontier_coverage_20/group_std_mean": 0.16414850652217866,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035395897924900055,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00184120642952621,
"signal/frontier_coverage_25/centered_abs_mean": 0.08961157202720642,
"signal/frontier_coverage_25/group_std_mean": 0.11496616899967194,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0246336467564106,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012814455199986695,
"signal/frontier_coverage_5/centered_abs_mean": 0.1478554666042328,
"signal/frontier_coverage_5/group_std_mean": 0.18775065541267394,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040648031234741214,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002114333095960319,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27715436220169065,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3507562756538391,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5331802070140839,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027715435624122618,
"step": 140
},
{
"calibration/aurc": 0.34746743831006704,
"calibration/batch_distribution_entropy": 0.9729663060399908,
"calibration/buffer_distribution_entropy": 0.9974155039324156,
"calibration/confidence_entropy": 0.5069634313783685,
"calibration/coverage@0%": 0.007032778864970645,
"calibration/coverage@1%": 0.007032778864970645,
"calibration/coverage@10%": 0.007814028864970646,
"calibration/coverage@15%": 0.050782778864970644,
"calibration/coverage@20%": 0.11487432729941291,
"calibration/coverage@25%": 0.3137460249510763,
"calibration/coverage@30%": 0.44777091487279846,
"calibration/coverage@5%": 0.007032778864970645,
"calibration/ece": 0.12659113833545074,
"calibration/mean_confidence": 0.48351704059525724,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 612.2,
"completions/max_terminated_length": 612.2,
"completions/mean_length": 226.98642578125,
"completions/mean_terminated_length": 227.0522918701172,
"completions/min_length": 42.8,
"completions/min_terminated_length": 108.8,
"epoch": 0.464,
"grad_norm": 0.011709939688444138,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 482854869.0,
"reward": 0.9039804100990295,
"reward_std": 0.07382192313671113,
"rewards/accuracy_reward": 0.47568359375,
"rewards/brier_reward": 0.7968503117561341,
"rewards/confidence_uniqueness_reward": 0.9505188941955567,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.158881214261055,
"rewards/frontier_coverage_1": 0.158881214261055,
"rewards/frontier_coverage_10": 0.158881214261055,
"rewards/frontier_coverage_15": 0.1568503975868225,
"rewards/frontier_coverage_20": 0.13968808948993683,
"rewards/frontier_coverage_25": 0.09043478444218636,
"rewards/frontier_coverage_5": 0.158881214261055,
"rewards/frontier_entropy_batch_reward": -0.2307354539632797,
"signal/accuracy_reward/centered_abs_mean": 0.072039794921875,
"signal/accuracy_reward/group_std_mean": 0.09741799160838127,
"signal/accuracy_reward/group_zero_std_frac": 0.7125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7294652104377747,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0360198974609375,
"signal/advantage_abs_mean": 0.765180766582489,
"signal/advantage_pre_scale_abs_mean": 0.05663144513964653,
"signal/advantage_pre_scale_std": 0.09198210388422012,
"signal/advantage_std": 0.9824758410453797,
"signal/brier_reward/centered_abs_mean": 0.10376063138246536,
"signal/brier_reward/group_std_mean": 0.1336098790168762,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21310822963714598,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010376062802970409,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013512828387320042,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017725981958210468,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02775384560227394,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001351282838732004,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005904573574662208,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.14577461481094361,
"signal/frontier_coverage_0/group_std_mean": 0.18616759181022643,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042809315770864484,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020845770370215176,
"signal/frontier_coverage_1/centered_abs_mean": 0.14577461481094361,
"signal/frontier_coverage_1/group_std_mean": 0.18616759181022643,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042809315770864484,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020845770370215176,
"signal/frontier_coverage_10/centered_abs_mean": 0.14577461481094361,
"signal/frontier_coverage_10/group_std_mean": 0.18616759181022643,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042809315770864484,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020845770370215176,
"signal/frontier_coverage_15/centered_abs_mean": 0.1432782530784607,
"signal/frontier_coverage_15/group_std_mean": 0.1829966723918915,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04207362085580826,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002048878977075219,
"signal/frontier_coverage_20/centered_abs_mean": 0.12409319430589676,
"signal/frontier_coverage_20/group_std_mean": 0.15879313945770263,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03639562539756298,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017745327437296509,
"signal/frontier_coverage_25/centered_abs_mean": 0.08080256581306458,
"signal/frontier_coverage_25/group_std_mean": 0.10407637059688568,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023712591081857682,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011554765980690718,
"signal/frontier_coverage_5/centered_abs_mean": 0.14577461481094361,
"signal/frontier_coverage_5/group_std_mean": 0.18616759181022643,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042809315770864484,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020845770370215176,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27938825488090513,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3512773871421814,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.568844985961914,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027938826382160185,
"step": 145
},
{
"calibration/aurc": 0.2291198538847199,
"calibration/batch_distribution_entropy": 0.9672892013539134,
"calibration/buffer_distribution_entropy": 0.9967666576020949,
"calibration/confidence_entropy": 0.4510841710064442,
"calibration/coverage@0%": 0.040251192514677105,
"calibration/coverage@1%": 0.040251192514677105,
"calibration/coverage@10%": 0.19861867049902152,
"calibration/coverage@15%": 0.3887743089530332,
"calibration/coverage@20%": 0.45448492539138946,
"calibration/coverage@25%": 0.5873960371819961,
"calibration/coverage@30%": 0.7125351638943249,
"calibration/coverage@5%": 0.0930390777886497,
"calibration/ece": 0.12407148786161874,
"calibration/mean_confidence": 0.4931261034620563,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 617.6,
"completions/max_terminated_length": 617.6,
"completions/mean_length": 216.017578125,
"completions/mean_terminated_length": 216.14576416015626,
"completions/min_length": 40.8,
"completions/min_terminated_length": 106.8,
"epoch": 0.48,
"grad_norm": 0.017295807600021362,
"learning_rate": 1e-06,
"loss": 0.0016,
"num_tokens": 500114921.0,
"reward": 0.9307031989097595,
"reward_std": 0.08427495062351227,
"rewards/accuracy_reward": 0.53408203125,
"rewards/brier_reward": 0.8068124175071716,
"rewards/confidence_uniqueness_reward": 0.949408769607544,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.14723927676677703,
"rewards/frontier_coverage_1": 0.14723927676677703,
"rewards/frontier_coverage_10": 0.14699542224407197,
"rewards/frontier_coverage_15": 0.14517690539360045,
"rewards/frontier_coverage_20": 0.12480606287717819,
"rewards/frontier_coverage_25": 0.08327750265598297,
"rewards/frontier_coverage_5": 0.14723927676677703,
"rewards/frontier_entropy_batch_reward": -0.25137184262275697,
"signal/accuracy_reward/centered_abs_mean": 0.104425048828125,
"signal/accuracy_reward/group_std_mean": 0.13805123120546342,
"signal/accuracy_reward/group_zero_std_frac": 0.603125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0096543788909913,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0522125244140625,
"signal/advantage_abs_mean": 0.7595040321350097,
"signal/advantage_pre_scale_abs_mean": 0.06471362709999084,
"signal/advantage_pre_scale_std": 0.1039919227361679,
"signal/advantage_std": 0.9825853824615478,
"signal/brier_reward/centered_abs_mean": 0.10939399302005767,
"signal/brier_reward/group_std_mean": 0.1398113638162613,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21181057393550873,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010939398780465126,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01502783726900816,
"signal/confidence_uniqueness_reward/group_std_mean": 0.02021808587014675,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02905021458864212,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015027837362140417,
"signal/format_reward/centered_abs_mean": 0.001123046875,
"signal/format_reward/group_std_mean": 0.0029782545287162067,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010594680532813072,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0005615234375,
"signal/frontier_coverage_0/centered_abs_mean": 0.17547143697738649,
"signal/frontier_coverage_0/group_std_mean": 0.22376441955566406,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04860707297921181,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025092415045946835,
"signal/frontier_coverage_1/centered_abs_mean": 0.17547143697738649,
"signal/frontier_coverage_1/group_std_mean": 0.22376441955566406,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04860707297921181,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025092415045946835,
"signal/frontier_coverage_10/centered_abs_mean": 0.175068262219429,
"signal/frontier_coverage_10/group_std_mean": 0.22327833473682404,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.048494862765073775,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025034761521965264,
"signal/frontier_coverage_15/centered_abs_mean": 0.17082957327365875,
"signal/frontier_coverage_15/group_std_mean": 0.21801035106182098,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04731718450784683,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024428628385066986,
"signal/frontier_coverage_20/centered_abs_mean": 0.13655193746089936,
"signal/frontier_coverage_20/group_std_mean": 0.1751394361257553,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03779491558670998,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001952692656777799,
"signal/frontier_coverage_25/centered_abs_mean": 0.07820582389831543,
"signal/frontier_coverage_25/group_std_mean": 0.1008089080452919,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021633072569966317,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011183432419784368,
"signal/frontier_coverage_5/centered_abs_mean": 0.17547143697738649,
"signal/frontier_coverage_5/group_std_mean": 0.22376441955566406,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04860707297921181,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025092415045946835,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29524141550064087,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.366878867149353,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.570873761177063,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029524140805006028,
"step": 150
},
{
"epoch": 0.48,
"eval_calibration/aurc": 0.45534531043154974,
"eval_calibration/batch_distribution_entropy": 0.926511097298464,
"eval_calibration/buffer_distribution_entropy": 0.9962565022453381,
"eval_calibration/confidence_entropy": 0.4713693076099079,
"eval_calibration/coverage@0%": 0.109375,
"eval_calibration/coverage@1%": 0.109375,
"eval_calibration/coverage@10%": 0.140625,
"eval_calibration/coverage@15%": 0.140625,
"eval_calibration/coverage@20%": 0.171875,
"eval_calibration/coverage@25%": 0.2265625,
"eval_calibration/coverage@30%": 0.2578125,
"eval_calibration/coverage@5%": 0.109375,
"eval_calibration/ece": 0.16938003565903828,
"eval_calibration/mean_confidence": 0.4290637407118039,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 378.5,
"eval_completions/max_terminated_length": 378.5,
"eval_completions/mean_length": 212.3056983947754,
"eval_completions/mean_terminated_length": 212.3056983947754,
"eval_completions/min_length": 127.75,
"eval_completions/min_terminated_length": 127.75,
"eval_loss": 0.0,
"eval_num_tokens": 500114921.0,
"eval_reward": 0.8005315959453583,
"eval_reward_std": 0.21906593441963196,
"eval_rewards/accuracy_reward": 0.421875,
"eval_rewards/brier_reward": 0.8141729980707169,
"eval_rewards/confidence_uniqueness_reward": 0.89990234375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.21159575507044792,
"eval_rewards/frontier_coverage_1": 0.21159575507044792,
"eval_rewards/frontier_coverage_10": 0.2111768200993538,
"eval_rewards/frontier_coverage_15": 0.2013298012316227,
"eval_rewards/frontier_coverage_20": 0.14685893058776855,
"eval_rewards/frontier_coverage_25": 0.07763573154807091,
"eval_rewards/frontier_coverage_5": 0.21159575507044792,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 20.4621,
"eval_samples_per_second": 24.435,
"eval_signal/accuracy_reward/centered_abs_mean": 0.467529296875,
"eval_signal/accuracy_reward/group_std_mean": 0.49060849100351334,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0691617131233215,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2337646484375,
"eval_signal/advantage_abs_mean": 0.9310326725244522,
"eval_signal/advantage_pre_scale_abs_mean": 0.2045309916138649,
"eval_signal/advantage_pre_scale_std": 0.21670874953269958,
"eval_signal/advantage_std": 0.9876660853624344,
"eval_signal/brier_reward/centered_abs_mean": 0.1706998273730278,
"eval_signal/brier_reward/group_std_mean": 0.21492478251457214,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07832564786076546,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01706998236477375,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03839111328125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04556787060573697,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017674416303634644,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038391113048419356,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.3698278069496155,
"eval_signal/frontier_coverage_0/group_std_mean": 0.44138026237487793,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.024246441666036844,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005288537475280464,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.3698278069496155,
"eval_signal/frontier_coverage_1/group_std_mean": 0.44138026237487793,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.024246441666036844,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005288537475280464,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3684462159872055,
"eval_signal/frontier_coverage_10/group_std_mean": 0.43977469205856323,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024155837018042803,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005268780863843858,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.35635514557361603,
"eval_signal/frontier_coverage_15/group_std_mean": 0.42567581683397293,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023364387918263674,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00509587861597538,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.26486819609999657,
"eval_signal/frontier_coverage_20/group_std_mean": 0.32019487768411636,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01737035741098225,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003787615045439452,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.12020084448158741,
"eval_signal/frontier_coverage_25/group_std_mean": 0.14966701343655586,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007880826713517308,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017188721103593707,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3698278069496155,
"eval_signal/frontier_coverage_5/group_std_mean": 0.44138026237487793,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.024246441666036844,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005288537475280464,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.195,
"step": 150
},
{
"calibration/aurc": 0.3319640447041089,
"calibration/batch_distribution_entropy": 0.9701258176485588,
"calibration/buffer_distribution_entropy": 0.9959642411136971,
"calibration/confidence_entropy": 0.46898921326255544,
"calibration/coverage@0%": 0.010546875,
"calibration/coverage@1%": 0.010546875,
"calibration/coverage@10%": 0.173046875,
"calibration/coverage@15%": 0.216015625,
"calibration/coverage@20%": 0.25234375,
"calibration/coverage@25%": 0.371484375,
"calibration/coverage@30%": 0.487109375,
"calibration/coverage@5%": 0.084765625,
"calibration/ece": 0.1387425856761773,
"calibration/mean_confidence": 0.48812744764793126,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 518.8,
"completions/max_terminated_length": 518.8,
"completions/mean_length": 208.6595703125,
"completions/mean_terminated_length": 208.6799102783203,
"completions/min_length": 88.4,
"completions/min_terminated_length": 109.2,
"epoch": 0.496,
"grad_norm": 0.013091221451759338,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 517559435.0,
"reward": 0.9493598937988281,
"reward_std": 0.07383857369422912,
"rewards/accuracy_reward": 0.57119140625,
"rewards/brier_reward": 0.8089569687843323,
"rewards/confidence_uniqueness_reward": 0.9516295194625854,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.10690305382013321,
"rewards/frontier_coverage_1": 0.10690305382013321,
"rewards/frontier_coverage_10": 0.10668100267648697,
"rewards/frontier_coverage_15": 0.10049253255128861,
"rewards/frontier_coverage_20": 0.07734967768192291,
"rewards/frontier_coverage_25": 0.05591387003660202,
"rewards/frontier_coverage_5": 0.10690305382013321,
"rewards/frontier_entropy_batch_reward": -0.21602373123168944,
"signal/accuracy_reward/centered_abs_mean": 0.072503662109375,
"signal/accuracy_reward/group_std_mean": 0.10430038273334503,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7448198437690735,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0362518310546875,
"signal/advantage_abs_mean": 0.7576547145843506,
"signal/advantage_pre_scale_abs_mean": 0.055284418165683746,
"signal/advantage_pre_scale_std": 0.09131110310554505,
"signal/advantage_std": 0.9824570298194886,
"signal/brier_reward/centered_abs_mean": 0.09475551843643189,
"signal/brier_reward/group_std_mean": 0.1237260028719902,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19527413845062255,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009475551731884479,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01260056346654892,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016603745333850384,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025821058079600334,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012600563932210207,
"signal/format_reward/centered_abs_mean": 0.000567626953125,
"signal/format_reward/group_std_mean": 0.0016572814900428056,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005250536277890206,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.13314241766929627,
"signal/frontier_coverage_0/group_std_mean": 0.17539192140102386,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03925930708646774,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019039364997297525,
"signal/frontier_coverage_1/centered_abs_mean": 0.13314241766929627,
"signal/frontier_coverage_1/group_std_mean": 0.17539192140102386,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03925930708646774,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019039364997297525,
"signal/frontier_coverage_10/centered_abs_mean": 0.1326947808265686,
"signal/frontier_coverage_10/group_std_mean": 0.17478241324424743,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039127344638109206,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018975354731082916,
"signal/frontier_coverage_15/centered_abs_mean": 0.12710427790880202,
"signal/frontier_coverage_15/group_std_mean": 0.1676137626171112,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.037500803172588346,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018175912089645862,
"signal/frontier_coverage_20/centered_abs_mean": 0.09209925383329391,
"signal/frontier_coverage_20/group_std_mean": 0.12147116661071777,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027178560569882392,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013170193182304502,
"signal/frontier_coverage_25/centered_abs_mean": 0.055260706692934036,
"signal/frontier_coverage_25/group_std_mean": 0.07163113951683045,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016299421340227126,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007902280893176794,
"signal/frontier_coverage_5/centered_abs_mean": 0.13314241766929627,
"signal/frontier_coverage_5/group_std_mean": 0.17539192140102386,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03925930708646774,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019039364997297525,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26992714703083037,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3414120674133301,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5531856119632721,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026992715150117873,
"step": 155
},
{
"calibration/aurc": 0.2318834156944694,
"calibration/batch_distribution_entropy": 0.9758865214113979,
"calibration/buffer_distribution_entropy": 0.9960580008108133,
"calibration/confidence_entropy": 0.5127858783596624,
"calibration/coverage@0%": 0.04804840386497065,
"calibration/coverage@1%": 0.04804840386497065,
"calibration/coverage@10%": 0.39586442025440316,
"calibration/coverage@15%": 0.46035806017612524,
"calibration/coverage@20%": 0.5201489114481409,
"calibration/coverage@25%": 0.5729023972602739,
"calibration/coverage@30%": 0.676439426369863,
"calibration/coverage@5%": 0.2101577788649706,
"calibration/ece": 0.13026999208636952,
"calibration/mean_confidence": 0.5039464087666106,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 475.4,
"completions/max_terminated_length": 475.4,
"completions/mean_length": 201.43544921875,
"completions/mean_terminated_length": 201.43544921875,
"completions/min_length": 109.4,
"completions/min_terminated_length": 109.4,
"epoch": 0.512,
"grad_norm": 0.01248211320489645,
"learning_rate": 1e-06,
"loss": 0.0039,
"num_tokens": 534767798.0,
"reward": 0.9442027688026429,
"reward_std": 0.07601016610860825,
"rewards/accuracy_reward": 0.55,
"rewards/brier_reward": 0.831080436706543,
"rewards/confidence_uniqueness_reward": 0.9524983644485474,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.13253318071365355,
"rewards/frontier_coverage_1": 0.13253318071365355,
"rewards/frontier_coverage_10": 0.1322164461016655,
"rewards/frontier_coverage_15": 0.1266620621085167,
"rewards/frontier_coverage_20": 0.08994849994778634,
"rewards/frontier_coverage_25": 0.058000007271766664,
"rewards/frontier_coverage_5": 0.13251669555902482,
"rewards/frontier_entropy_batch_reward": -0.20609368085861207,
"signal/accuracy_reward/centered_abs_mean": 0.0776611328125,
"signal/accuracy_reward/group_std_mean": 0.1082550346851349,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7978426694869996,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03883056640625,
"signal/advantage_abs_mean": 0.7590952754020691,
"signal/advantage_pre_scale_abs_mean": 0.05769476890563965,
"signal/advantage_pre_scale_std": 0.09525633007287979,
"signal/advantage_std": 0.9824547410011292,
"signal/brier_reward/centered_abs_mean": 0.08847891539335251,
"signal/brier_reward/group_std_mean": 0.11509132534265518,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18230343163013457,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.008847891353070736,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011811437085270882,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015052905678749085,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024400829523801803,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001181143708527088,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018382035195827484,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1262390971183777,
"signal/frontier_coverage_0/group_std_mean": 0.16516108214855194,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037380128353834155,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018052191007882357,
"signal/frontier_coverage_1/centered_abs_mean": 0.1262390971183777,
"signal/frontier_coverage_1/group_std_mean": 0.16516108214855194,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037380128353834155,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018052191007882357,
"signal/frontier_coverage_10/centered_abs_mean": 0.12583804428577422,
"signal/frontier_coverage_10/group_std_mean": 0.16464310884475708,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037261802703142166,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017994840862229466,
"signal/frontier_coverage_15/centered_abs_mean": 0.11806275993585587,
"signal/frontier_coverage_15/group_std_mean": 0.15434040427207946,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0349416546523571,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016882974887266755,
"signal/frontier_coverage_20/centered_abs_mean": 0.07932479679584503,
"signal/frontier_coverage_20/group_std_mean": 0.10407408773899078,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02345772311091423,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011343445628881454,
"signal/frontier_coverage_25/centered_abs_mean": 0.04835866242647171,
"signal/frontier_coverage_25/group_std_mean": 0.062165239453315736,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01430096197873354,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006915288628078997,
"signal/frontier_coverage_5/centered_abs_mean": 0.1262181043624878,
"signal/frontier_coverage_5/group_std_mean": 0.1651339590549469,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03737393617630005,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001804918935522437,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2682627737522125,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3407862842082977,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5545450925827027,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02682627774775028,
"step": 160
},
{
"calibration/aurc": 0.19211939730195765,
"calibration/batch_distribution_entropy": 0.9900490234949414,
"calibration/buffer_distribution_entropy": 0.9960697114973808,
"calibration/confidence_entropy": 0.48208753862210135,
"calibration/coverage@0%": 0.042578125,
"calibration/coverage@1%": 0.05625,
"calibration/coverage@10%": 0.394921875,
"calibration/coverage@15%": 0.476953125,
"calibration/coverage@20%": 0.626171875,
"calibration/coverage@25%": 0.705859375,
"calibration/coverage@30%": 0.78203125,
"calibration/coverage@5%": 0.172265625,
"calibration/ece": 0.13546201731264892,
"calibration/mean_confidence": 0.503985014979136,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 702.2,
"completions/max_terminated_length": 702.2,
"completions/mean_length": 200.08134765625,
"completions/mean_terminated_length": 200.08134765625,
"completions/min_length": 113.8,
"completions/min_terminated_length": 113.8,
"epoch": 0.528,
"grad_norm": 0.013641457073390484,
"learning_rate": 1e-06,
"loss": 0.0023,
"num_tokens": 551846167.0,
"reward": 0.9402725458145141,
"reward_std": 0.0775743842124939,
"rewards/accuracy_reward": 0.54189453125,
"rewards/brier_reward": 0.8286908626556396,
"rewards/confidence_uniqueness_reward": 0.9521965026855469,
"rewards/format_reward": 1.0,
"rewards/frontier_coverage_0": 0.15275048613548278,
"rewards/frontier_coverage_1": 0.15275048613548278,
"rewards/frontier_coverage_10": 0.1523078754544258,
"rewards/frontier_coverage_15": 0.14277659058570863,
"rewards/frontier_coverage_20": 0.10134280398488045,
"rewards/frontier_coverage_25": 0.06609501764178276,
"rewards/frontier_coverage_5": 0.15271973311901094,
"rewards/frontier_entropy_batch_reward": -0.21930084824562074,
"signal/accuracy_reward/centered_abs_mean": 0.089093017578125,
"signal/accuracy_reward/group_std_mean": 0.12107728868722915,
"signal/accuracy_reward/group_zero_std_frac": 0.640625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9205329418182373,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0445465087890625,
"signal/advantage_abs_mean": 0.7600311875343323,
"signal/advantage_pre_scale_abs_mean": 0.05986330807209015,
"signal/advantage_pre_scale_std": 0.09803989231586456,
"signal/advantage_std": 0.9824413180351257,
"signal/brier_reward/centered_abs_mean": 0.08963337987661361,
"signal/brier_reward/group_std_mean": 0.11848148554563523,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18603391349315643,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.0089633384719491,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011856555938720703,
"signal/confidence_uniqueness_reward/group_std_mean": 0.014830333553254605,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024678315967321396,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011856555938720703,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_0/centered_abs_mean": 0.14773441553115846,
"signal/frontier_coverage_0/group_std_mean": 0.19397561848163605,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04386069774627686,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021126021165400743,
"signal/frontier_coverage_1/centered_abs_mean": 0.14773441553115846,
"signal/frontier_coverage_1/group_std_mean": 0.19397561848163605,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04386069774627686,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021126021165400743,
"signal/frontier_coverage_10/centered_abs_mean": 0.14719023555517197,
"signal/frontier_coverage_10/group_std_mean": 0.19325293004512786,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043698471039533615,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002104820357635617,
"signal/frontier_coverage_15/centered_abs_mean": 0.13356127738952636,
"signal/frontier_coverage_15/group_std_mean": 0.17507249414920806,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03960662260651589,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019099263940006495,
"signal/frontier_coverage_20/centered_abs_mean": 0.08548935353755951,
"signal/frontier_coverage_20/group_std_mean": 0.11230573058128357,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025287511199712752,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012224977603182197,
"signal/frontier_coverage_25/centered_abs_mean": 0.05132223665714264,
"signal/frontier_coverage_25/group_std_mean": 0.06637111082673072,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01524107065051794,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007339079747907817,
"signal/frontier_coverage_5/centered_abs_mean": 0.14769805371761321,
"signal/frontier_coverage_5/group_std_mean": 0.1939283013343811,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.043849749863147734,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021120821125805377,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2675458133220673,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34275596737861636,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5556636452674866,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026754581928253175,
"step": 165
},
{
"calibration/aurc": 0.20294509108922654,
"calibration/batch_distribution_entropy": 0.9546776256074434,
"calibration/buffer_distribution_entropy": 0.9954494699983767,
"calibration/confidence_entropy": 0.43927290283615655,
"calibration/coverage@0%": 0.02265625,
"calibration/coverage@1%": 0.02265625,
"calibration/coverage@10%": 0.183984375,
"calibration/coverage@15%": 0.3640625,
"calibration/coverage@20%": 0.64375,
"calibration/coverage@25%": 0.7265625,
"calibration/coverage@30%": 0.79375,
"calibration/coverage@5%": 0.0859375,
"calibration/ece": 0.08818259891898914,
"calibration/mean_confidence": 0.5408140311130343,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 506.0,
"completions/max_terminated_length": 506.0,
"completions/mean_length": 199.5048828125,
"completions/mean_terminated_length": 199.52403869628907,
"completions/min_length": 93.0,
"completions/min_terminated_length": 114.4,
"epoch": 0.544,
"grad_norm": 0.012110423296689987,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 569052681.0,
"reward": 0.9582688570022583,
"reward_std": 0.07946361750364303,
"rewards/accuracy_reward": 0.59326171875,
"rewards/brier_reward": 0.8115216851234436,
"rewards/confidence_uniqueness_reward": 0.951054048538208,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.09490841627120972,
"rewards/frontier_coverage_1": 0.09490841627120972,
"rewards/frontier_coverage_10": 0.09457989484071731,
"rewards/frontier_coverage_15": 0.08632062524557113,
"rewards/frontier_coverage_20": 0.06573501899838448,
"rewards/frontier_coverage_25": 0.06225412338972092,
"rewards/frontier_coverage_5": 0.09490256607532502,
"rewards/frontier_entropy_batch_reward": -0.23010531365871428,
"signal/accuracy_reward/centered_abs_mean": 0.091876220703125,
"signal/accuracy_reward/group_std_mean": 0.1262803852558136,
"signal/accuracy_reward/group_zero_std_frac": 0.61875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8946531057357788,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0459381103515625,
"signal/advantage_abs_mean": 0.7608879446983338,
"signal/advantage_pre_scale_abs_mean": 0.0603870801627636,
"signal/advantage_pre_scale_std": 0.0982098788022995,
"signal/advantage_std": 0.9825691699981689,
"signal/brier_reward/centered_abs_mean": 0.0995179459452629,
"signal/brier_reward/group_std_mean": 0.1290470004081726,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1940900981426239,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009951795265078545,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012855185754597187,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016486688517034054,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025106297805905342,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001285518566146493,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003780721127986908,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.14924255907535552,
"signal/frontier_coverage_0/group_std_mean": 0.19254024922847748,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041698559373617175,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00213416856713593,
"signal/frontier_coverage_1/centered_abs_mean": 0.14924255907535552,
"signal/frontier_coverage_1/group_std_mean": 0.19254024922847748,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041698559373617175,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00213416856713593,
"signal/frontier_coverage_10/centered_abs_mean": 0.14833201169967652,
"signal/frontier_coverage_10/group_std_mean": 0.19136776328086852,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04144668877124787,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021211476530879738,
"signal/frontier_coverage_15/centered_abs_mean": 0.132179157435894,
"signal/frontier_coverage_15/group_std_mean": 0.17049570083618165,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036941982060670855,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018901619594544172,
"signal/frontier_coverage_20/centered_abs_mean": 0.08309006989002228,
"signal/frontier_coverage_20/group_std_mean": 0.10664766877889634,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023256586492061616,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011881879763677716,
"signal/frontier_coverage_25/centered_abs_mean": 0.054529760777950284,
"signal/frontier_coverage_25/group_std_mean": 0.06911371499300004,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015250799432396888,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007797755766659975,
"signal/frontier_coverage_5/centered_abs_mean": 0.149216166138649,
"signal/frontier_coverage_5/group_std_mean": 0.19250675439834594,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041691217571496964,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002133791148662567,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2766104400157928,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3481856346130371,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5387953042984008,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027661043778061868,
"step": 170
},
{
"calibration/aurc": 0.22822852984902148,
"calibration/batch_distribution_entropy": 0.9655225762120716,
"calibration/buffer_distribution_entropy": 0.9952540292473733,
"calibration/confidence_entropy": 0.49669730637340487,
"calibration/coverage@0%": 0.134765625,
"calibration/coverage@1%": 0.2109375,
"calibration/coverage@10%": 0.37265625,
"calibration/coverage@15%": 0.41640625,
"calibration/coverage@20%": 0.46875,
"calibration/coverage@25%": 0.50703125,
"calibration/coverage@30%": 0.580859375,
"calibration/coverage@5%": 0.28359375,
"calibration/ece": 0.13059685649729424,
"calibration/mean_confidence": 0.4859837007115013,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 445.4,
"completions/max_terminated_length": 445.4,
"completions/mean_length": 201.94296875,
"completions/mean_terminated_length": 202.03963623046874,
"completions/min_length": 43.0,
"completions/min_terminated_length": 110.0,
"epoch": 0.56,
"grad_norm": 0.011170346289873123,
"learning_rate": 1e-06,
"loss": -0.0019,
"num_tokens": 585941985.0,
"reward": 0.9336464762687683,
"reward_std": 0.07471658736467361,
"rewards/accuracy_reward": 0.537890625,
"rewards/brier_reward": 0.8261852979660034,
"rewards/confidence_uniqueness_reward": 0.9501984477043152,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_coverage_0": 0.14562440067529678,
"rewards/frontier_coverage_1": 0.14562440067529678,
"rewards/frontier_coverage_10": 0.14465759322047234,
"rewards/frontier_coverage_15": 0.12708725333213805,
"rewards/frontier_coverage_20": 0.0821958489716053,
"rewards/frontier_coverage_25": 0.05846571922302246,
"rewards/frontier_coverage_5": 0.14558007940649986,
"rewards/frontier_entropy_batch_reward": -0.24837148189544678,
"signal/accuracy_reward/centered_abs_mean": 0.07193603515625,
"signal/accuracy_reward/group_std_mean": 0.10199806988239288,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7364301919937134,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035968017578125,
"signal/advantage_abs_mean": 0.7664315819740295,
"signal/advantage_pre_scale_abs_mean": 0.05653882250189781,
"signal/advantage_pre_scale_std": 0.09310483485460282,
"signal/advantage_std": 0.9824739694595337,
"signal/brier_reward/centered_abs_mean": 0.08978293836116791,
"signal/brier_reward/group_std_mean": 0.11686733067035675,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18375783562660217,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.008978294022381306,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013617974147200585,
"signal/confidence_uniqueness_reward/group_std_mean": 0.018551425263285636,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027894172444939615,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013617974007502198,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009561251290142537,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_coverage_0/centered_abs_mean": 0.13698640018701552,
"signal/frontier_coverage_0/group_std_mean": 0.1777627319097519,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04013899490237236,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001958905439823866,
"signal/frontier_coverage_1/centered_abs_mean": 0.13698640018701552,
"signal/frontier_coverage_1/group_std_mean": 0.1777627319097519,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04013899490237236,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001958905439823866,
"signal/frontier_coverage_10/centered_abs_mean": 0.13580810874700547,
"signal/frontier_coverage_10/group_std_mean": 0.17621684074401855,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03979276791214943,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019420559052377938,
"signal/frontier_coverage_15/centered_abs_mean": 0.11728577166795731,
"signal/frontier_coverage_15/group_std_mean": 0.15197210609912873,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03433753401041031,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016771864844486116,
"signal/frontier_coverage_20/centered_abs_mean": 0.06963706314563751,
"signal/frontier_coverage_20/group_std_mean": 0.09059495776891709,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020375318080186843,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000995810003951192,
"signal/frontier_coverage_25/centered_abs_mean": 0.046510016173124315,
"signal/frontier_coverage_25/group_std_mean": 0.06020733863115311,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013616615161299705,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006650932133197785,
"signal/frontier_coverage_5/centered_abs_mean": 0.13694753050804137,
"signal/frontier_coverage_5/group_std_mean": 0.17771164178848267,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04012744650244713,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019583496730774643,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2976150155067444,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36509778499603274,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6098406314849854,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029761501774191856,
"step": 175
},
{
"calibration/aurc": 0.25513026872028377,
"calibration/batch_distribution_entropy": 0.9604550999332429,
"calibration/buffer_distribution_entropy": 0.9955296544792447,
"calibration/confidence_entropy": 0.4595333190087679,
"calibration/coverage@0%": 0.044537365459882586,
"calibration/coverage@1%": 0.044537365459882586,
"calibration/coverage@10%": 0.19186796722113503,
"calibration/coverage@15%": 0.30011848703522503,
"calibration/coverage@20%": 0.3857035836594912,
"calibration/coverage@25%": 0.5373417624755381,
"calibration/coverage@30%": 0.6647673067514678,
"calibration/coverage@5%": 0.1257873654598826,
"calibration/ece": 0.10435565679611791,
"calibration/mean_confidence": 0.47617095896785244,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 481.8,
"completions/max_terminated_length": 481.8,
"completions/mean_length": 219.14482421875,
"completions/mean_terminated_length": 219.14482421875,
"completions/min_length": 99.0,
"completions/min_terminated_length": 99.0,
"epoch": 0.576,
"grad_norm": 0.01333346776664257,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 603372652.0,
"reward": 0.9337128639221192,
"reward_std": 0.0706071525812149,
"rewards/accuracy_reward": 0.53369140625,
"rewards/brier_reward": 0.8254560589790344,
"rewards/confidence_uniqueness_reward": 0.9495491623878479,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.15936579406261445,
"rewards/frontier_coverage_1": 0.15936579406261445,
"rewards/frontier_coverage_10": 0.15831765085458754,
"rewards/frontier_coverage_15": 0.13857824504375457,
"rewards/frontier_coverage_20": 0.09109884053468705,
"rewards/frontier_coverage_25": 0.06888703480362893,
"rewards/frontier_coverage_5": 0.15928825587034226,
"rewards/frontier_entropy_batch_reward": -0.2395363688468933,
"signal/accuracy_reward/centered_abs_mean": 0.070025634765625,
"signal/accuracy_reward/group_std_mean": 0.09729954451322556,
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7560475468635559,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0350128173828125,
"signal/advantage_abs_mean": 0.7645478129386902,
"signal/advantage_pre_scale_abs_mean": 0.05406961366534233,
"signal/advantage_pre_scale_std": 0.0894312784075737,
"signal/advantage_std": 0.9823664426803589,
"signal/brier_reward/centered_abs_mean": 0.08987597078084945,
"signal/brier_reward/group_std_mean": 0.11730807423591613,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19417197108268738,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.00898759663105011,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013608038239181042,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0172698387876153,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029590404033660887,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00136080386582762,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002120504714548588,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13798716366291047,
"signal/frontier_coverage_0/group_std_mean": 0.17882009148597716,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04257337599992752,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001973216445185244,
"signal/frontier_coverage_1/centered_abs_mean": 0.13798716366291047,
"signal/frontier_coverage_1/group_std_mean": 0.17882009148597716,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04257337599992752,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001973216445185244,
"signal/frontier_coverage_10/centered_abs_mean": 0.13663693964481355,
"signal/frontier_coverage_10/group_std_mean": 0.17710677087306975,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042156299203634263,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019539082422852516,
"signal/frontier_coverage_15/centered_abs_mean": 0.11394921988248825,
"signal/frontier_coverage_15/group_std_mean": 0.1480672240257263,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035157003253698346,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016294738743454218,
"signal/frontier_coverage_20/centered_abs_mean": 0.06810803413391113,
"signal/frontier_coverage_20/group_std_mean": 0.08849272578954696,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021022461354732513,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009739449014887214,
"signal/frontier_coverage_25/centered_abs_mean": 0.05104894489049912,
"signal/frontier_coverage_25/group_std_mean": 0.06541921645402908,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015766990557312965,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007299999007955194,
"signal/frontier_coverage_5/centered_abs_mean": 0.13789215981960296,
"signal/frontier_coverage_5/group_std_mean": 0.17869631946086884,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04254399910569191,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001971857948228717,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27577903866767883,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34956675171852114,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5942099094390869,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027577904239296912,
"step": 180
},
{
"calibration/aurc": 0.2714459675058643,
"calibration/batch_distribution_entropy": 0.9616355905909744,
"calibration/buffer_distribution_entropy": 0.9950926642195649,
"calibration/confidence_entropy": 0.4635280172759992,
"calibration/coverage@0%": 0.073046875,
"calibration/coverage@1%": 0.083203125,
"calibration/coverage@10%": 0.31015625,
"calibration/coverage@15%": 0.38359375,
"calibration/coverage@20%": 0.459375,
"calibration/coverage@25%": 0.551953125,
"calibration/coverage@30%": 0.61015625,
"calibration/coverage@5%": 0.213671875,
"calibration/ece": 0.13643323395430737,
"calibration/mean_confidence": 0.4728889014629608,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 527.0,
"completions/max_terminated_length": 527.0,
"completions/mean_length": 237.69599609375,
"completions/mean_terminated_length": 237.69599609375,
"completions/min_length": 129.4,
"completions/min_terminated_length": 129.4,
"epoch": 0.592,
"grad_norm": 0.016083823516964912,
"learning_rate": 1e-06,
"loss": -0.0008,
"num_tokens": 620974371.0,
"reward": 0.929496419429779,
"reward_std": 0.07398554235696793,
"rewards/accuracy_reward": 0.5287109375,
"rewards/brier_reward": 0.816726803779602,
"rewards/confidence_uniqueness_reward": 0.9510353088378907,
"rewards/format_reward": 1.0,
"rewards/frontier_coverage_0": 0.1491527661681175,
"rewards/frontier_coverage_1": 0.1491527661681175,
"rewards/frontier_coverage_10": 0.1479087233543396,
"rewards/frontier_coverage_15": 0.12765701860189438,
"rewards/frontier_coverage_20": 0.08184282779693604,
"rewards/frontier_coverage_25": 0.06409678980708122,
"rewards/frontier_coverage_5": 0.14910189658403397,
"rewards/frontier_entropy_batch_reward": -0.24060723185539246,
"signal/accuracy_reward/centered_abs_mean": 0.078125,
"signal/accuracy_reward/group_std_mean": 0.10625525563955307,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8084635734558105,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0390625,
"signal/advantage_abs_mean": 0.7757280707359314,
"signal/advantage_pre_scale_abs_mean": 0.05764142274856567,
"signal/advantage_pre_scale_std": 0.09220658987760544,
"signal/advantage_std": 0.9824289321899414,
"signal/brier_reward/centered_abs_mean": 0.09430547803640366,
"signal/brier_reward/group_std_mean": 0.12114822864532471,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.196575266122818,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009430548176169395,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013017821311950683,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016292367503046988,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0272565308958292,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013017821591347456,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_0/centered_abs_mean": 0.148694708943367,
"signal/frontier_coverage_0/group_std_mean": 0.19025928378105164,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04428746402263641,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021263343514874578,
"signal/frontier_coverage_1/centered_abs_mean": 0.148694708943367,
"signal/frontier_coverage_1/group_std_mean": 0.19025928378105164,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04428746402263641,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021263343514874578,
"signal/frontier_coverage_10/centered_abs_mean": 0.14685680270195006,
"signal/frontier_coverage_10/group_std_mean": 0.18790920376777648,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043737325072288516,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002100052172318101,
"signal/frontier_coverage_15/centered_abs_mean": 0.12006380707025528,
"signal/frontier_coverage_15/group_std_mean": 0.15359488427639006,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03576338440179825,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017169124213978649,
"signal/frontier_coverage_20/centered_abs_mean": 0.0711003676056862,
"signal/frontier_coverage_20/group_std_mean": 0.09024541974067687,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021219252794981002,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010167352622374893,
"signal/frontier_coverage_25/centered_abs_mean": 0.05128743276000023,
"signal/frontier_coverage_25/group_std_mean": 0.06477131098508834,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015384691581130028,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007334102760069072,
"signal/frontier_coverage_5/centered_abs_mean": 0.14860089123249054,
"signal/frontier_coverage_5/group_std_mean": 0.19013891518115997,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04425964131951332,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021249927347525956,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29118287563323975,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36209931373596194,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6089503169059753,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029118288308382034,
"step": 185
},
{
"calibration/aurc": 0.19273776348515756,
"calibration/batch_distribution_entropy": 0.9611420908727529,
"calibration/buffer_distribution_entropy": 0.9947319347562583,
"calibration/confidence_entropy": 0.46235385435086274,
"calibration/coverage@0%": 0.0953125,
"calibration/coverage@1%": 0.103515625,
"calibration/coverage@10%": 0.419921875,
"calibration/coverage@15%": 0.491796875,
"calibration/coverage@20%": 0.5875,
"calibration/coverage@25%": 0.665625,
"calibration/coverage@30%": 0.7375,
"calibration/coverage@5%": 0.235546875,
"calibration/ece": 0.09930096783332423,
"calibration/mean_confidence": 0.4746575482431852,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 708.8,
"completions/max_terminated_length": 708.8,
"completions/mean_length": 255.9431640625,
"completions/mean_terminated_length": 255.99241638183594,
"completions/min_length": 81.2,
"completions/min_terminated_length": 135.0,
"epoch": 0.608,
"grad_norm": 0.007871582172811031,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 638594717.0,
"reward": 0.9367780327796936,
"reward_std": 0.07506957724690437,
"rewards/accuracy_reward": 0.53486328125,
"rewards/brier_reward": 0.8349022507667542,
"rewards/confidence_uniqueness_reward": 0.9513669490814209,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.16537888646125792,
"rewards/frontier_coverage_1": 0.16537888646125792,
"rewards/frontier_coverage_10": 0.16384746134281158,
"rewards/frontier_coverage_15": 0.13749379813671112,
"rewards/frontier_coverage_20": 0.08554163128137589,
"rewards/frontier_coverage_25": 0.07167729437351227,
"rewards/frontier_coverage_5": 0.16531487107276915,
"rewards/frontier_entropy_batch_reward": -0.22834107279777527,
"signal/accuracy_reward/centered_abs_mean": 0.085858154296875,
"signal/accuracy_reward/group_std_mean": 0.11284001320600509,
"signal/accuracy_reward/group_zero_std_frac": 0.68125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9049631834030152,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0429290771484375,
"signal/advantage_abs_mean": 0.7692124009132385,
"signal/advantage_pre_scale_abs_mean": 0.05844322219491005,
"signal/advantage_pre_scale_std": 0.09503102004528045,
"signal/advantage_std": 0.9823786616325378,
"signal/brier_reward/centered_abs_mean": 0.0926524430513382,
"signal/brier_reward/group_std_mean": 0.12010594606399536,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19838889837265014,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.00926524419337511,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012868665717542171,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01655961014330387,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02771872468292713,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001286866539157927,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003949139639735222,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.1529883474111557,
"signal/frontier_coverage_0/group_std_mean": 0.19571956098079682,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04671046063303948,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002187733328901231,
"signal/frontier_coverage_1/centered_abs_mean": 0.1529883474111557,
"signal/frontier_coverage_1/group_std_mean": 0.19571956098079682,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04671046063303948,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002187733328901231,
"signal/frontier_coverage_10/centered_abs_mean": 0.1511484533548355,
"signal/frontier_coverage_10/group_std_mean": 0.19337283372879027,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04615175053477287,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002161422930657864,
"signal/frontier_coverage_15/centered_abs_mean": 0.1199759766459465,
"signal/frontier_coverage_15/group_std_mean": 0.15368208587169646,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036673346906900404,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017156564630568028,
"signal/frontier_coverage_20/centered_abs_mean": 0.06901527941226959,
"signal/frontier_coverage_20/group_std_mean": 0.08763439208269119,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021173715963959693,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009869184694252908,
"signal/frontier_coverage_25/centered_abs_mean": 0.05233650431036949,
"signal/frontier_coverage_25/group_std_mean": 0.06636691689491273,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016095756366848946,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007484119967557489,
"signal/frontier_coverage_5/centered_abs_mean": 0.15288768708705902,
"signal/frontier_coverage_5/group_std_mean": 0.19559282064437866,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046679823845624926,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021862938767299054,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28050378561019895,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3514762043952942,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6038827538490296,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0280503798276186,
"step": 190
},
{
"calibration/aurc": 0.20130243987184096,
"calibration/batch_distribution_entropy": 0.9757232849051418,
"calibration/buffer_distribution_entropy": 0.9946261313671636,
"calibration/confidence_entropy": 0.4755939827672188,
"calibration/coverage@0%": 0.050785072162426614,
"calibration/coverage@1%": 0.07148819716242662,
"calibration/coverage@10%": 0.3278650929549902,
"calibration/coverage@15%": 0.4200908145792564,
"calibration/coverage@20%": 0.5170040362035225,
"calibration/coverage@25%": 0.6205632338551859,
"calibration/coverage@30%": 0.7198125611545988,
"calibration/coverage@5%": 0.1879410775440313,
"calibration/ece": 0.10973041707453461,
"calibration/mean_confidence": 0.510571645381397,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 741.8,
"completions/max_terminated_length": 741.8,
"completions/mean_length": 264.2986328125,
"completions/mean_terminated_length": 264.4277404785156,
"completions/min_length": 26.6,
"completions/min_terminated_length": 141.2,
"epoch": 0.624,
"grad_norm": 0.008098253048956394,
"learning_rate": 1e-06,
"loss": -0.0045,
"num_tokens": 656645039.0,
"reward": 0.9362733840942383,
"reward_std": 0.08029964119195938,
"rewards/accuracy_reward": 0.53525390625,
"rewards/brier_reward": 0.8243496537208557,
"rewards/confidence_uniqueness_reward": 0.9524309396743774,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.1459769055247307,
"rewards/frontier_coverage_1": 0.1459769055247307,
"rewards/frontier_coverage_10": 0.1443769782781601,
"rewards/frontier_coverage_15": 0.11724838614463806,
"rewards/frontier_coverage_20": 0.07035666406154632,
"rewards/frontier_coverage_25": 0.06681257486343384,
"rewards/frontier_coverage_5": 0.14591864347457886,
"rewards/frontier_entropy_batch_reward": -0.20702989101409913,
"signal/accuracy_reward/centered_abs_mean": 0.092120361328125,
"signal/accuracy_reward/group_std_mean": 0.11874449849128724,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.922400152683258,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0460601806640625,
"signal/advantage_abs_mean": 0.7747326731681824,
"signal/advantage_pre_scale_abs_mean": 0.06263215094804764,
"signal/advantage_pre_scale_std": 0.10089927017688752,
"signal/advantage_std": 0.9825205326080322,
"signal/brier_reward/centered_abs_mean": 0.10271832048892975,
"signal/brier_reward/group_std_mean": 0.13176209926605226,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20568051934242249,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010271831974387169,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012912808544933796,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01798289269208908,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02583295851945877,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012912808684632181,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011222666688263416,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_coverage_0/centered_abs_mean": 0.15500531494617462,
"signal/frontier_coverage_0/group_std_mean": 0.1970183253288269,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0443998321890831,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022165759466588495,
"signal/frontier_coverage_1/centered_abs_mean": 0.15500531494617462,
"signal/frontier_coverage_1/group_std_mean": 0.1970183253288269,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0443998321890831,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022165759466588495,
"signal/frontier_coverage_10/centered_abs_mean": 0.1526143193244934,
"signal/frontier_coverage_10/group_std_mean": 0.19403861463069916,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043713013827800754,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021823848597705363,
"signal/frontier_coverage_15/centered_abs_mean": 0.11518861204385758,
"signal/frontier_coverage_15/group_std_mean": 0.14710773229599,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03298960886895656,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016471970826387405,
"signal/frontier_coverage_20/centered_abs_mean": 0.062545096129179,
"signal/frontier_coverage_20/group_std_mean": 0.08005284368991852,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017897342145442963,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008943948661908508,
"signal/frontier_coverage_25/centered_abs_mean": 0.054150203615427016,
"signal/frontier_coverage_25/group_std_mean": 0.0692131370306015,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015493759512901306,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007743479101918638,
"signal/frontier_coverage_5/centered_abs_mean": 0.15490354895591735,
"signal/frontier_coverage_5/group_std_mean": 0.19688799381256103,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.044370852410793304,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002215120755136013,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2715447604656219,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3448778748512268,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5443657517433167,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027154476940631868,
"step": 195
},
{
"calibration/aurc": 0.2305115593686886,
"calibration/batch_distribution_entropy": 0.9769002181990043,
"calibration/buffer_distribution_entropy": 0.994425807362342,
"calibration/confidence_entropy": 0.47991060140927677,
"calibration/coverage@0%": 0.05351868272994129,
"calibration/coverage@1%": 0.0726593077299413,
"calibration/coverage@10%": 0.3296951443248532,
"calibration/coverage@15%": 0.3894607693248532,
"calibration/coverage@20%": 0.4332199425146771,
"calibration/coverage@25%": 0.5414307118395303,
"calibration/coverage@30%": 0.6391083659491195,
"calibration/coverage@5%": 0.2496124327299413,
"calibration/ece": 0.1474371272941966,
"calibration/mean_confidence": 0.5323626348162074,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00068359375,
"completions/max_length": 737.4,
"completions/max_terminated_length": 737.4,
"completions/mean_length": 256.04560546875,
"completions/mean_terminated_length": 256.2203735351562,
"completions/min_length": 54.4,
"completions/min_terminated_length": 136.6,
"epoch": 0.64,
"grad_norm": 0.007591220550239086,
"learning_rate": 1e-06,
"loss": -0.0053,
"num_tokens": 674609634.0,
"reward": 0.958326268196106,
"reward_std": 0.07369578629732132,
"rewards/accuracy_reward": 0.586328125,
"rewards/brier_reward": 0.8264079332351685,
"rewards/confidence_uniqueness_reward": 0.9515935182571411,
"rewards/format_reward": 0.99931640625,
"rewards/frontier_coverage_0": 0.11021586209535598,
"rewards/frontier_coverage_1": 0.11021586209535598,
"rewards/frontier_coverage_10": 0.1090992659330368,
"rewards/frontier_coverage_15": 0.09044042155146599,
"rewards/frontier_coverage_20": 0.0605145275592804,
"rewards/frontier_coverage_25": 0.06968777850270272,
"rewards/frontier_coverage_5": 0.11018936783075332,
"rewards/frontier_entropy_batch_reward": -0.21739307940006256,
"signal/accuracy_reward/centered_abs_mean": 0.07176513671875,
"signal/accuracy_reward/group_std_mean": 0.09551991671323776,
"signal/accuracy_reward/group_zero_std_frac": 0.721875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7490824341773987,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.035882568359375,
"signal/advantage_abs_mean": 0.7772216916084289,
"signal/advantage_pre_scale_abs_mean": 0.057156357914209366,
"signal/advantage_pre_scale_std": 0.09424262046813965,
"signal/advantage_std": 0.9824276566505432,
"signal/brier_reward/centered_abs_mean": 0.09486225694417953,
"signal/brier_reward/group_std_mean": 0.12378767281770706,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19762584567070007,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009486225806176663,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013210498169064522,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01799871101975441,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027587800472974777,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013210498262196779,
"signal/format_reward/centered_abs_mean": 0.001287841796875,
"signal/format_reward/group_std_mean": 0.003135160403326154,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.012948540598154068,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0006439208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.13088381588459014,
"signal/frontier_coverage_0/group_std_mean": 0.17006581425666809,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03911969661712646,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018716384656727314,
"signal/frontier_coverage_1/centered_abs_mean": 0.13088381588459014,
"signal/frontier_coverage_1/group_std_mean": 0.17006581425666809,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03911969661712646,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018716384656727314,
"signal/frontier_coverage_10/centered_abs_mean": 0.12875951677560807,
"signal/frontier_coverage_10/group_std_mean": 0.16733676493167876,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03848160281777382,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001841261121444404,
"signal/frontier_coverage_15/centered_abs_mean": 0.09472260624170303,
"signal/frontier_coverage_15/group_std_mean": 0.12351420521736145,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028285817056894303,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013545332476496696,
"signal/frontier_coverage_20/centered_abs_mean": 0.054108986258506776,
"signal/frontier_coverage_20/group_std_mean": 0.06981250941753388,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0161893917247653,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000773758499417454,
"signal/frontier_coverage_25/centered_abs_mean": 0.05493494421243668,
"signal/frontier_coverage_25/group_std_mean": 0.0703204944729805,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016481004655361176,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007855696836486459,
"signal/frontier_coverage_5/centered_abs_mean": 0.13078642785549163,
"signal/frontier_coverage_5/group_std_mean": 0.16993974149227142,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03909059762954712,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018702458590269088,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2803233087062836,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3516668021678925,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5867632031440735,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803233154118061,
"step": 200
},
{
"epoch": 0.64,
"eval_calibration/aurc": 0.4308345787167824,
"eval_calibration/batch_distribution_entropy": 0.9444042163645358,
"eval_calibration/buffer_distribution_entropy": 0.9942015969240219,
"eval_calibration/confidence_entropy": 0.4692993798939642,
"eval_calibration/coverage@0%": 0.0625,
"eval_calibration/coverage@1%": 0.0625,
"eval_calibration/coverage@10%": 0.0625,
"eval_calibration/coverage@15%": 0.09375,
"eval_calibration/coverage@20%": 0.140625,
"eval_calibration/coverage@25%": 0.28125,
"eval_calibration/coverage@30%": 0.3359375,
"eval_calibration/coverage@5%": 0.0625,
"eval_calibration/ece": 0.1523365805242153,
"eval_calibration/mean_confidence": 0.44097538324252694,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 454.0,
"eval_completions/max_terminated_length": 454.0,
"eval_completions/mean_length": 252.9209976196289,
"eval_completions/mean_terminated_length": 252.9209976196289,
"eval_completions/min_length": 151.25,
"eval_completions/min_terminated_length": 151.25,
"eval_loss": 0.0,
"eval_num_tokens": 674609634.0,
"eval_reward": 0.8108291625976562,
"eval_reward_std": 0.22701482847332954,
"eval_rewards/accuracy_reward": 0.451171875,
"eval_rewards/brier_reward": 0.8127871453762054,
"eval_rewards/confidence_uniqueness_reward": 0.894287109375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.19150054454803467,
"eval_rewards/frontier_coverage_1": 0.19150054454803467,
"eval_rewards/frontier_coverage_10": 0.18698867037892342,
"eval_rewards/frontier_coverage_15": 0.13827473297715187,
"eval_rewards/frontier_coverage_20": 0.06924514845013618,
"eval_rewards/frontier_coverage_25": 0.04760201275348663,
"eval_rewards/frontier_coverage_5": 0.1913793683052063,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 22.9444,
"eval_samples_per_second": 21.792,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375,
"eval_signal/accuracy_reward/group_std_mean": 0.4935734122991562,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0439613461494446,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875,
"eval_signal/advantage_abs_mean": 0.9394318014383316,
"eval_signal/advantage_pre_scale_abs_mean": 0.21379049867391586,
"eval_signal/advantage_pre_scale_std": 0.2245732806622982,
"eval_signal/advantage_std": 0.9876820743083954,
"eval_signal/brier_reward/centered_abs_mean": 0.17406537756323814,
"eval_signal/brier_reward/group_std_mean": 0.2270943932235241,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07685280591249466,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.01740653719753027,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0407257080078125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.049375214613974094,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017996263224631548,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004072570824064314,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36413776874542236,
"eval_signal/frontier_coverage_0/group_std_mean": 0.432192362844944,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.023011908400803804,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0052071703830733895,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36413776874542236,
"eval_signal/frontier_coverage_1/group_std_mean": 0.432192362844944,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.023011908400803804,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0052071703830733895,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.3524700105190277,
"eval_signal/frontier_coverage_10/group_std_mean": 0.418954998254776,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022273984737694263,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005040321149863303,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2525060772895813,
"eval_signal/frontier_coverage_15/group_std_mean": 0.30542421340942383,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01595612964592874,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036108369240537286,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10488817654550076,
"eval_signal/frontier_coverage_20/group_std_mean": 0.133793443441391,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006626064772717655,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014999008853919804,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.07727677188813686,
"eval_signal/frontier_coverage_25/group_std_mean": 0.10409998148679733,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004873032798059285,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001105057803215459,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3638596907258034,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4318847507238388,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022994326427578926,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00520319351926446,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.174,
"step": 200
},
{
"calibration/aurc": 0.41555223691353416,
"calibration/batch_distribution_entropy": 0.9703810471803284,
"calibration/buffer_distribution_entropy": 0.9944921143370096,
"calibration/confidence_entropy": 0.5089773199690188,
"calibration/coverage@0%": 0.00234375,
"calibration/coverage@1%": 0.00234375,
"calibration/coverage@10%": 0.013671875,
"calibration/coverage@15%": 0.026953125,
"calibration/coverage@20%": 0.10625,
"calibration/coverage@25%": 0.233203125,
"calibration/coverage@30%": 0.372265625,
"calibration/coverage@5%": 0.00234375,
"calibration/ece": 0.10794217888237238,
"calibration/mean_confidence": 0.4368189563932054,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 752.4,
"completions/max_terminated_length": 752.4,
"completions/mean_length": 245.75146484375,
"completions/mean_terminated_length": 245.77511901855468,
"completions/min_length": 101.0,
"completions/min_terminated_length": 128.8,
"epoch": 0.656,
"grad_norm": 0.010888704098761082,
"learning_rate": 1e-06,
"loss": 0.0003,
"num_tokens": 691982673.0,
"reward": 0.9192820072174073,
"reward_std": 0.07650423496961593,
"rewards/accuracy_reward": 0.50830078125,
"rewards/brier_reward": 0.7998928785324096,
"rewards/confidence_uniqueness_reward": 0.9526479959487915,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.1341045081615448,
"rewards/frontier_coverage_1": 0.1341045081615448,
"rewards/frontier_coverage_10": 0.13056059628725053,
"rewards/frontier_coverage_15": 0.09860608130693435,
"rewards/frontier_coverage_20": 0.05775674730539322,
"rewards/frontier_coverage_25": 0.052621806412935256,
"rewards/frontier_coverage_5": 0.13404361754655839,
"rewards/frontier_entropy_batch_reward": -0.20681337118148804,
"signal/accuracy_reward/centered_abs_mean": 0.080706787109375,
"signal/accuracy_reward/group_std_mean": 0.11042787879705429,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8209300398826599,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0403533935546875,
"signal/advantage_abs_mean": 0.7633422970771789,
"signal/advantage_pre_scale_abs_mean": 0.05868617594242096,
"signal/advantage_pre_scale_std": 0.09539144784212113,
"signal/advantage_std": 0.9824797153472901,
"signal/brier_reward/centered_abs_mean": 0.10599584430456162,
"signal/brier_reward/group_std_mean": 0.135506734251976,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21632728576660157,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010599584691226483,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011845141276717186,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01510525420308113,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024192561581730842,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011845141649246215,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019330549985170364,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1526343673467636,
"signal/frontier_coverage_0/group_std_mean": 0.19555696845054626,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04452885463833809,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021826714277267457,
"signal/frontier_coverage_1/centered_abs_mean": 0.1526343673467636,
"signal/frontier_coverage_1/group_std_mean": 0.19555696845054626,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04452885463833809,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021826714277267457,
"signal/frontier_coverage_10/centered_abs_mean": 0.14827150702476502,
"signal/frontier_coverage_10/group_std_mean": 0.18989112377166747,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04325413852930069,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002120282547548413,
"signal/frontier_coverage_15/centered_abs_mean": 0.10958008021116257,
"signal/frontier_coverage_15/group_std_mean": 0.14022946059703828,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03196649923920632,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001566995191387832,
"signal/frontier_coverage_20/centered_abs_mean": 0.060041727125644685,
"signal/frontier_coverage_20/group_std_mean": 0.07666357308626175,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017537441104650497,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008585967007093132,
"signal/frontier_coverage_25/centered_abs_mean": 0.052678339183330536,
"signal/frontier_coverage_25/group_std_mean": 0.06774556338787079,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015397872030735015,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007533002295531333,
"signal/frontier_coverage_5/centered_abs_mean": 0.15252876728773118,
"signal/frontier_coverage_5/group_std_mean": 0.19542471468448638,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04449806213378906,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021811614045873285,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632203996181488,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3332472801208496,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5378780126571655,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02632203996181488,
"step": 205
},
{
"calibration/aurc": 0.27446644817007665,
"calibration/batch_distribution_entropy": 0.9767794518788089,
"calibration/buffer_distribution_entropy": 0.9953144343128398,
"calibration/confidence_entropy": 0.46897802364283125,
"calibration/coverage@0%": 0.021105216487279845,
"calibration/coverage@1%": 0.07462084148727985,
"calibration/coverage@10%": 0.21842129403131114,
"calibration/coverage@15%": 0.24773039995107632,
"calibration/coverage@20%": 0.3294084821428571,
"calibration/coverage@25%": 0.44349697284735806,
"calibration/coverage@30%": 0.518525256849315,
"calibration/coverage@5%": 0.15626146648727984,
"calibration/ece": 0.1646093725597769,
"calibration/mean_confidence": 0.4924234660935678,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 602.2,
"completions/max_terminated_length": 602.2,
"completions/mean_length": 240.45693359375,
"completions/mean_terminated_length": 240.5041259765625,
"completions/min_length": 101.8,
"completions/min_terminated_length": 129.8,
"epoch": 0.672,
"grad_norm": 0.008477822877466679,
"learning_rate": 1e-06,
"loss": 0.0001,
"num_tokens": 709358392.0,
"reward": 0.9301062107086182,
"reward_std": 0.07615272402763366,
"rewards/accuracy_reward": 0.529296875,
"rewards/brier_reward": 0.8106651425361633,
"rewards/confidence_uniqueness_reward": 0.950485908985138,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.1491150051355362,
"rewards/frontier_coverage_1": 0.1491150051355362,
"rewards/frontier_coverage_10": 0.14698787182569503,
"rewards/frontier_coverage_15": 0.11903707012534141,
"rewards/frontier_coverage_20": 0.07292983531951905,
"rewards/frontier_coverage_25": 0.0670611746609211,
"rewards/frontier_coverage_5": 0.1490369975566864,
"rewards/frontier_entropy_batch_reward": -0.2276163637638092,
"signal/accuracy_reward/centered_abs_mean": 0.08685302734375,
"signal/accuracy_reward/group_std_mean": 0.11438945978879929,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9162230610847473,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.043426513671875,
"signal/advantage_abs_mean": 0.7653620719909668,
"signal/advantage_pre_scale_abs_mean": 0.05935278162360191,
"signal/advantage_pre_scale_std": 0.09651183784008026,
"signal/advantage_std": 0.9823976397514343,
"signal/brier_reward/centered_abs_mean": 0.10704858005046844,
"signal/brier_reward/group_std_mean": 0.13854455947875977,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22724690437316894,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010704858414828777,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013841424137353897,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017753540351986886,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029474389180541037,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001384142437018454,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.004257309436798096,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.15937035381793976,
"signal/frontier_coverage_0/group_std_mean": 0.20475198030471803,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04841887578368187,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022789960727095603,
"signal/frontier_coverage_1/centered_abs_mean": 0.15937035381793976,
"signal/frontier_coverage_1/group_std_mean": 0.20475198030471803,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04841887578368187,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022789960727095603,
"signal/frontier_coverage_10/centered_abs_mean": 0.15666671991348266,
"signal/frontier_coverage_10/group_std_mean": 0.20127066373825073,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.047596973925828935,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022403340321034194,
"signal/frontier_coverage_15/centered_abs_mean": 0.11362015008926392,
"signal/frontier_coverage_15/group_std_mean": 0.14626134932041168,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03458226881921291,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016247681109234692,
"signal/frontier_coverage_20/centered_abs_mean": 0.06491810157895088,
"signal/frontier_coverage_20/group_std_mean": 0.08258429169654846,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019771148264408112,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009283288381993771,
"signal/frontier_coverage_25/centered_abs_mean": 0.05772334411740303,
"signal/frontier_coverage_25/group_std_mean": 0.07298219352960586,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017550046369433404,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008254437940195203,
"signal/frontier_coverage_5/centered_abs_mean": 0.1592436820268631,
"signal/frontier_coverage_5/group_std_mean": 0.20459164381027223,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04838085174560547,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002277184650301933,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27080374360084536,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34516674280166626,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5760879635810852,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027080375328660013,
"step": 210
},
{
"calibration/aurc": 0.2857587551102182,
"calibration/batch_distribution_entropy": 0.9700236130811619,
"calibration/buffer_distribution_entropy": 0.9955469989841135,
"calibration/confidence_entropy": 0.5117476351469353,
"calibration/coverage@0%": 0.005859375,
"calibration/coverage@1%": 0.005859375,
"calibration/coverage@10%": 0.157421875,
"calibration/coverage@15%": 0.22735600490196078,
"calibration/coverage@20%": 0.45704656862745097,
"calibration/coverage@25%": 0.5449647671568627,
"calibration/coverage@30%": 0.6262408088235294,
"calibration/coverage@5%": 0.0859375,
"calibration/ece": 0.1445287858422077,
"calibration/mean_confidence": 0.48163763612065813,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 560.4,
"completions/max_terminated_length": 560.4,
"completions/mean_length": 242.11982421875,
"completions/mean_terminated_length": 242.16812744140626,
"completions/min_length": 102.0,
"completions/min_terminated_length": 129.4,
"epoch": 0.688,
"grad_norm": 0.008597953245043755,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 726791619.0,
"reward": 0.9433683037757874,
"reward_std": 0.07582886517047882,
"rewards/accuracy_reward": 0.55654296875,
"rewards/brier_reward": 0.8110973238945007,
"rewards/confidence_uniqueness_reward": 0.9522903680801391,
"rewards/format_reward": 0.99970703125,
"rewards/frontier_coverage_0": 0.12071355283260346,
"rewards/frontier_coverage_1": 0.12071355283260346,
"rewards/frontier_coverage_10": 0.11720674857497215,
"rewards/frontier_coverage_15": 0.08717693164944648,
"rewards/frontier_coverage_20": 0.057222628593444826,
"rewards/frontier_coverage_25": 0.05811392888426781,
"rewards/frontier_coverage_5": 0.12064254283905029,
"rewards/frontier_entropy_batch_reward": -0.20845062732696534,
"signal/accuracy_reward/centered_abs_mean": 0.082000732421875,
"signal/accuracy_reward/group_std_mean": 0.1131775364279747,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8328173041343689,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0410003662109375,
"signal/advantage_abs_mean": 0.7577017068862915,
"signal/advantage_pre_scale_abs_mean": 0.05770004838705063,
"signal/advantage_pre_scale_std": 0.09458618760108947,
"signal/advantage_std": 0.9824642658233642,
"signal/brier_reward/centered_abs_mean": 0.09881015568971634,
"signal/brier_reward/group_std_mean": 0.1283961772918701,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2029067099094391,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009881015866994858,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012166624888777733,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015853742510080336,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025115343555808068,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001216662465594709,
"signal/format_reward/centered_abs_mean": 0.000555419921875,
"signal/format_reward/group_std_mean": 0.0013209730386734009,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.005849538929760456,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375,
"signal/frontier_coverage_0/centered_abs_mean": 0.1484951466321945,
"signal/frontier_coverage_0/group_std_mean": 0.1917928636074066,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04357870742678642,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002123480476438999,
"signal/frontier_coverage_1/centered_abs_mean": 0.1484951466321945,
"signal/frontier_coverage_1/group_std_mean": 0.1917928636074066,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04357870742678642,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002123480476438999,
"signal/frontier_coverage_10/centered_abs_mean": 0.1446128100156784,
"signal/frontier_coverage_10/group_std_mean": 0.18671999275684356,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04244330748915672,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002067963080480695,
"signal/frontier_coverage_15/centered_abs_mean": 0.10260212272405625,
"signal/frontier_coverage_15/group_std_mean": 0.13325890451669692,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030202661454677582,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014672103570774198,
"signal/frontier_coverage_20/centered_abs_mean": 0.058193116635084155,
"signal/frontier_coverage_20/group_std_mean": 0.0752414420247078,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017166363447904585,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008321615867316722,
"signal/frontier_coverage_25/centered_abs_mean": 0.051832232624292374,
"signal/frontier_coverage_25/group_std_mean": 0.06646927148103714,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015283860266208649,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000741200940683484,
"signal/frontier_coverage_5/centered_abs_mean": 0.14837707877159118,
"signal/frontier_coverage_5/group_std_mean": 0.19164316952228547,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0435446061193943,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021217921981588005,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2712122559547424,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3420302629470825,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5576587617397308,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027121226489543914,
"step": 215
},
{
"calibration/aurc": 0.20254961526086906,
"calibration/batch_distribution_entropy": 0.9618709855860214,
"calibration/buffer_distribution_entropy": 0.9959088849568218,
"calibration/confidence_entropy": 0.45951236349176466,
"calibration/coverage@0%": 0.05703125,
"calibration/coverage@1%": 0.069140625,
"calibration/coverage@10%": 0.300390625,
"calibration/coverage@15%": 0.384375,
"calibration/coverage@20%": 0.552734375,
"calibration/coverage@25%": 0.644921875,
"calibration/coverage@30%": 0.731640625,
"calibration/coverage@5%": 0.203125,
"calibration/ece": 0.09126460030129198,
"calibration/mean_confidence": 0.5245861508927346,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 526.0,
"completions/max_terminated_length": 526.0,
"completions/mean_length": 237.3544921875,
"completions/mean_terminated_length": 237.3544921875,
"completions/min_length": 128.4,
"completions/min_terminated_length": 128.4,
"epoch": 0.704,
"grad_norm": 0.007751199882477522,
"learning_rate": 1e-06,
"loss": -0.002,
"num_tokens": 744088273.0,
"reward": 0.942232632637024,
"reward_std": 0.07207171618938446,
"rewards/accuracy_reward": 0.54990234375,
"rewards/brier_reward": 0.8264536499977112,
"rewards/confidence_uniqueness_reward": 0.9524115085601806,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.1335995927453041,
"rewards/frontier_coverage_1": 0.1335995927453041,
"rewards/frontier_coverage_10": 0.130966417491436,
"rewards/frontier_coverage_15": 0.09904391467571258,
"rewards/frontier_coverage_20": 0.06477966532111168,
"rewards/frontier_coverage_25": 0.06846426129341125,
"rewards/frontier_coverage_5": 0.13356347233057023,
"rewards/frontier_entropy_batch_reward": -0.21481671035289765,
"signal/accuracy_reward/centered_abs_mean": 0.070770263671875,
"signal/accuracy_reward/group_std_mean": 0.09581695944070816,
"signal/accuracy_reward/group_zero_std_frac": 0.7125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7298851132392883,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0353851318359375,
"signal/advantage_abs_mean": 0.7701761364936829,
"signal/advantage_pre_scale_abs_mean": 0.055718979239463805,
"signal/advantage_pre_scale_std": 0.09126545041799546,
"signal/advantage_std": 0.9824565052986145,
"signal/brier_reward/centered_abs_mean": 0.0969886377453804,
"signal/brier_reward/group_std_mean": 0.12564177215099334,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2006084829568863,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009698864258825778,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011934367194771766,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015236479230225086,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024650559201836585,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011934367474168539,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020299691706895827,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1355642005801201,
"signal/frontier_coverage_0/group_std_mean": 0.1746540993452072,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04006961435079574,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019385680090636015,
"signal/frontier_coverage_1/centered_abs_mean": 0.1355642005801201,
"signal/frontier_coverage_1/group_std_mean": 0.1746540993452072,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04006961435079574,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019385680090636015,
"signal/frontier_coverage_10/centered_abs_mean": 0.13053236603736879,
"signal/frontier_coverage_10/group_std_mean": 0.16829843819141388,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03858681917190552,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001866612839512527,
"signal/frontier_coverage_15/centered_abs_mean": 0.09063916206359864,
"signal/frontier_coverage_15/group_std_mean": 0.11711875647306443,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026824329048395157,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012961399741470813,
"signal/frontier_coverage_20/centered_abs_mean": 0.05672153383493424,
"signal/frontier_coverage_20/group_std_mean": 0.07227804362773896,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01677936241030693,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000811117934063077,
"signal/frontier_coverage_25/centered_abs_mean": 0.05697656720876694,
"signal/frontier_coverage_25/group_std_mean": 0.07198350727558137,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016841649636626245,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008147649001330137,
"signal/frontier_coverage_5/centered_abs_mean": 0.13546179682016374,
"signal/frontier_coverage_5/group_std_mean": 0.1745290517807007,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040039440244436265,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019371037138625979,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2767252385616302,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.351086288690567,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5704930663108826,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027672524750232696,
"step": 220
},
{
"calibration/aurc": 0.23497755005648022,
"calibration/batch_distribution_entropy": 0.9809308019215919,
"calibration/buffer_distribution_entropy": 0.9957884730343161,
"calibration/confidence_entropy": 0.48714494304938716,
"calibration/coverage@0%": 0.03984375,
"calibration/coverage@1%": 0.03984375,
"calibration/coverage@10%": 0.21796875,
"calibration/coverage@15%": 0.287109375,
"calibration/coverage@20%": 0.480078125,
"calibration/coverage@25%": 0.612109375,
"calibration/coverage@30%": 0.7046875,
"calibration/coverage@5%": 0.166015625,
"calibration/ece": 0.14858386498676532,
"calibration/mean_confidence": 0.4596200044797,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 515.0,
"completions/max_terminated_length": 515.0,
"completions/mean_length": 240.1232421875,
"completions/mean_terminated_length": 240.14661254882813,
"completions/min_length": 103.8,
"completions/min_terminated_length": 128.0,
"epoch": 0.72,
"grad_norm": 0.009818264283239841,
"learning_rate": 1e-06,
"loss": 0.003,
"num_tokens": 761556991.0,
"reward": 0.9482641577720642,
"reward_std": 0.07391180843114853,
"rewards/accuracy_reward": 0.56865234375,
"rewards/brier_reward": 0.8173715233802795,
"rewards/confidence_uniqueness_reward": 0.9519086122512818,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.11691680550575256,
"rewards/frontier_coverage_1": 0.11691680550575256,
"rewards/frontier_coverage_10": 0.11170322000980377,
"rewards/frontier_coverage_15": 0.08474379926919937,
"rewards/frontier_coverage_20": 0.05883038938045502,
"rewards/frontier_coverage_25": 0.06428172141313553,
"rewards/frontier_coverage_5": 0.11694350391626358,
"rewards/frontier_entropy_batch_reward": -0.2252698600292206,
"signal/accuracy_reward/centered_abs_mean": 0.079315185546875,
"signal/accuracy_reward/group_std_mean": 0.10617940872907639,
"signal/accuracy_reward/group_zero_std_frac": 0.6875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8031948566436767,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0396575927734375,
"signal/advantage_abs_mean": 0.76589115858078,
"signal/advantage_pre_scale_abs_mean": 0.057143434882164,
"signal/advantage_pre_scale_std": 0.09222103357315063,
"signal/advantage_std": 0.9824912071228027,
"signal/brier_reward/centered_abs_mean": 0.09368100613355637,
"signal/brier_reward/group_std_mean": 0.12197220623493195,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1903090626001358,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009368100762367248,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012216830253601074,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015557673759758472,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024774272739887238,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012216830858960749,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002020454406738281,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14588095843791962,
"signal/frontier_coverage_0/group_std_mean": 0.1874998241662979,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042351162433624266,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020860977238044143,
"signal/frontier_coverage_1/centered_abs_mean": 0.14588095843791962,
"signal/frontier_coverage_1/group_std_mean": 0.1874998241662979,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042351162433624266,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020860977238044143,
"signal/frontier_coverage_10/centered_abs_mean": 0.13653019070625305,
"signal/frontier_coverage_10/group_std_mean": 0.1753913700580597,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03963543772697449,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019523817114531993,
"signal/frontier_coverage_15/centered_abs_mean": 0.09215132296085357,
"signal/frontier_coverage_15/group_std_mean": 0.11820129603147507,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026709938794374465,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013177639339119196,
"signal/frontier_coverage_20/centered_abs_mean": 0.0542985163629055,
"signal/frontier_coverage_20/group_std_mean": 0.06933169215917587,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015776522643864154,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007764688110910356,
"signal/frontier_coverage_25/centered_abs_mean": 0.05071000531315804,
"signal/frontier_coverage_25/group_std_mean": 0.06490004062652588,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014756758883595466,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007251530652865768,
"signal/frontier_coverage_5/centered_abs_mean": 0.14580158591270448,
"signal/frontier_coverage_5/group_std_mean": 0.18739546239376068,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04232985600829124,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020849626045674084,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28452731370925904,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3587178647518158,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5762077331542969,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02845273055136204,
"step": 225
},
{
"calibration/aurc": 0.20461066553526025,
"calibration/batch_distribution_entropy": 0.9639326995442499,
"calibration/buffer_distribution_entropy": 0.996154304026672,
"calibration/confidence_entropy": 0.4545212953812185,
"calibration/coverage@0%": 0.02265625,
"calibration/coverage@1%": 0.02265625,
"calibration/coverage@10%": 0.14296875,
"calibration/coverage@15%": 0.320703125,
"calibration/coverage@20%": 0.5828125,
"calibration/coverage@25%": 0.7171875,
"calibration/coverage@30%": 0.837109375,
"calibration/coverage@5%": 0.083984375,
"calibration/ece": 0.1254218466088354,
"calibration/mean_confidence": 0.5545885902527345,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 636.4,
"completions/max_terminated_length": 636.4,
"completions/mean_length": 249.5712890625,
"completions/mean_terminated_length": 249.5712890625,
"completions/min_length": 138.6,
"completions/min_terminated_length": 138.6,
"epoch": 0.736,
"grad_norm": 0.011151596903800964,
"learning_rate": 1e-06,
"loss": 0.0023,
"num_tokens": 779052185.0,
"reward": 0.9520087242126465,
"reward_std": 0.07401313185691834,
"rewards/accuracy_reward": 0.5802734375,
"rewards/brier_reward": 0.8151456475257873,
"rewards/confidence_uniqueness_reward": 0.9502784729003906,
"rewards/format_reward": 1.0,
"rewards/frontier_coverage_0": 0.11596761420369148,
"rewards/frontier_coverage_1": 0.11596761420369148,
"rewards/frontier_coverage_10": 0.11190555989742279,
"rewards/frontier_coverage_15": 0.08655463755130768,
"rewards/frontier_coverage_20": 0.0668842189013958,
"rewards/frontier_coverage_25": 0.07651213482022286,
"rewards/frontier_coverage_5": 0.11595459505915642,
"rewards/frontier_entropy_batch_reward": -0.24533769488334656,
"signal/accuracy_reward/centered_abs_mean": 0.076953125,
"signal/accuracy_reward/group_std_mean": 0.10131202191114426,
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.805925703048706,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0384765625,
"signal/advantage_abs_mean": 0.7833456873893738,
"signal/advantage_pre_scale_abs_mean": 0.0583541601896286,
"signal/advantage_pre_scale_std": 0.09414290338754654,
"signal/advantage_std": 0.9824194788932801,
"signal/brier_reward/centered_abs_mean": 0.10101247578859329,
"signal/brier_reward/group_std_mean": 0.12959804385900497,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21122341156005858,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010101247392594815,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013729357719421386,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017146859876811506,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028852657228708268,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013729357859119772,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_0/centered_abs_mean": 0.1417178988456726,
"signal/frontier_coverage_0/group_std_mean": 0.18116957545280457,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04281155541539192,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002026565885171294,
"signal/frontier_coverage_1/centered_abs_mean": 0.1417178988456726,
"signal/frontier_coverage_1/group_std_mean": 0.18116957545280457,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04281155541539192,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002026565885171294,
"signal/frontier_coverage_10/centered_abs_mean": 0.13176652491092683,
"signal/frontier_coverage_10/group_std_mean": 0.1686902552843094,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039776308834552764,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018842613324522972,
"signal/frontier_coverage_15/centered_abs_mean": 0.08698353171348572,
"signal/frontier_coverage_15/group_std_mean": 0.11125607341527939,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026212720572948454,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012438645120710135,
"signal/frontier_coverage_20/centered_abs_mean": 0.05746523961424828,
"signal/frontier_coverage_20/group_std_mean": 0.07287896871566772,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017264867946505547,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008217529277317226,
"signal/frontier_coverage_25/centered_abs_mean": 0.058684618771076204,
"signal/frontier_coverage_25/group_std_mean": 0.07420662641525269,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017552951723337172,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008391900104470551,
"signal/frontier_coverage_5/centered_abs_mean": 0.14159742891788482,
"signal/frontier_coverage_5/group_std_mean": 0.18101400136947632,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04277472048997879,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020248432643711566,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2850883662700653,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35867486596107484,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5991427898406982,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0285088375210762,
"step": 230
},
{
"calibration/aurc": 0.25544721535261844,
"calibration/batch_distribution_entropy": 0.9757735884606185,
"calibration/buffer_distribution_entropy": 0.996224589318703,
"calibration/confidence_entropy": 0.46901248682433605,
"calibration/coverage@0%": 0.018359375,
"calibration/coverage@1%": 0.018359375,
"calibration/coverage@10%": 0.16015625,
"calibration/coverage@15%": 0.334375,
"calibration/coverage@20%": 0.468359375,
"calibration/coverage@25%": 0.575390625,
"calibration/coverage@30%": 0.654296875,
"calibration/coverage@5%": 0.0671875,
"calibration/ece": 0.11607450062335481,
"calibration/mean_confidence": 0.48190249004650265,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 610.6,
"completions/max_terminated_length": 610.6,
"completions/mean_length": 242.105859375,
"completions/mean_terminated_length": 242.105859375,
"completions/min_length": 134.6,
"completions/min_terminated_length": 134.6,
"epoch": 0.752,
"grad_norm": 0.013876860029995441,
"learning_rate": 1e-06,
"loss": -0.002,
"num_tokens": 796758549.0,
"reward": 0.9462219715118408,
"reward_std": 0.07667578011751175,
"rewards/accuracy_reward": 0.56220703125,
"rewards/brier_reward": 0.8160680532455444,
"rewards/confidence_uniqueness_reward": 0.951573121547699,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.1251913160085678,
"rewards/frontier_coverage_1": 0.1251913160085678,
"rewards/frontier_coverage_10": 0.1213041938841343,
"rewards/frontier_coverage_15": 0.08119001314043998,
"rewards/frontier_coverage_20": 0.059100668504834176,
"rewards/frontier_coverage_25": 0.06714674010872841,
"rewards/frontier_coverage_5": 0.1251828819513321,
"rewards/frontier_entropy_batch_reward": -0.2166842043399811,
"signal/accuracy_reward/centered_abs_mean": 0.078961181640625,
"signal/accuracy_reward/group_std_mean": 0.10518565624952317,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8214090228080749,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0394805908203125,
"signal/advantage_abs_mean": 0.7728389739990235,
"signal/advantage_pre_scale_abs_mean": 0.059433307498693466,
"signal/advantage_pre_scale_std": 0.09831408560276031,
"signal/advantage_std": 0.9824405074119568,
"signal/brier_reward/centered_abs_mean": 0.09824747443199158,
"signal/brier_reward/group_std_mean": 0.12751171737909317,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20607516467571257,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009824748151004314,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012649696692824364,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016062306985259057,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02607582099735737,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012649696320295334,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.001700025051832199,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13381745666265488,
"signal/frontier_coverage_0/group_std_mean": 0.1727729856967926,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.040107411891222,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019135896116495132,
"signal/frontier_coverage_1/centered_abs_mean": 0.13381745666265488,
"signal/frontier_coverage_1/group_std_mean": 0.1727729856967926,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.040107411891222,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019135896116495132,
"signal/frontier_coverage_10/centered_abs_mean": 0.12753710001707078,
"signal/frontier_coverage_10/group_std_mean": 0.16487825214862822,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0382267102599144,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001823780476115644,
"signal/frontier_coverage_15/centered_abs_mean": 0.08149610757827759,
"signal/frontier_coverage_15/group_std_mean": 0.10600130110979081,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024482429772615433,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011653943452984094,
"signal/frontier_coverage_20/centered_abs_mean": 0.05388506054878235,
"signal/frontier_coverage_20/group_std_mean": 0.06884423345327377,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01612280998378992,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000770556356292218,
"signal/frontier_coverage_25/centered_abs_mean": 0.0581989660859108,
"signal/frontier_coverage_25/group_std_mean": 0.07418040782213212,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017351610027253626,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008322452078573406,
"signal/frontier_coverage_5/centered_abs_mean": 0.1337724283337593,
"signal/frontier_coverage_5/group_std_mean": 0.17271509468555452,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04009381532669067,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019129457185044884,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2703861802816391,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3440077781677246,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5595187842845917,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02703861817717552,
"step": 235
},
{
"calibration/aurc": 0.21897409183336064,
"calibration/batch_distribution_entropy": 0.9783713011332612,
"calibration/buffer_distribution_entropy": 0.9962282854215292,
"calibration/confidence_entropy": 0.5118263385716734,
"calibration/coverage@0%": 0.13413879036203522,
"calibration/coverage@1%": 0.18923449730919764,
"calibration/coverage@10%": 0.32562912793542076,
"calibration/coverage@15%": 0.39518025318003913,
"calibration/coverage@20%": 0.4823186766144814,
"calibration/coverage@25%": 0.5889914077788649,
"calibration/coverage@30%": 0.6902007399706458,
"calibration/coverage@5%": 0.255272290851272,
"calibration/ece": 0.14865977206824074,
"calibration/mean_confidence": 0.48145361429484357,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 559.4,
"completions/max_terminated_length": 559.4,
"completions/mean_length": 248.37353515625,
"completions/mean_terminated_length": 248.39793090820314,
"completions/min_length": 100.8,
"completions/min_terminated_length": 127.6,
"epoch": 0.768,
"grad_norm": 0.008506418205797672,
"learning_rate": 1e-06,
"loss": 0.002,
"num_tokens": 814234598.0,
"reward": 0.9303049683570862,
"reward_std": 0.0718627542257309,
"rewards/accuracy_reward": 0.5232421875,
"rewards/brier_reward": 0.8212167739868164,
"rewards/confidence_uniqueness_reward": 0.9525408267974853,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.14440589100122453,
"rewards/frontier_coverage_1": 0.14440589100122453,
"rewards/frontier_coverage_10": 0.14087323248386383,
"rewards/frontier_coverage_15": 0.09991578608751298,
"rewards/frontier_coverage_20": 0.05962400585412979,
"rewards/frontier_coverage_25": 0.057767481356859204,
"rewards/frontier_coverage_5": 0.14436377733945846,
"rewards/frontier_entropy_batch_reward": -0.19959425628185273,
"signal/accuracy_reward/centered_abs_mean": 0.069140625,
"signal/accuracy_reward/group_std_mean": 0.09864080995321274,
"signal/accuracy_reward/group_zero_std_frac": 0.696875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7402550339698791,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0345703125,
"signal/advantage_abs_mean": 0.7564525723457336,
"signal/advantage_pre_scale_abs_mean": 0.05381500422954559,
"signal/advantage_pre_scale_std": 0.09075729846954346,
"signal/advantage_std": 0.9823861718177795,
"signal/brier_reward/centered_abs_mean": 0.09333090484142303,
"signal/brier_reward/group_std_mean": 0.12142147421836853,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19983896911144255,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009333090484142303,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01187050472944975,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01518601570278406,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025268430635333062,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011870504822582006,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019358094781637193,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1401743471622467,
"signal/frontier_coverage_0/group_std_mean": 0.182624551653862,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04287303537130356,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020044931676238774,
"signal/frontier_coverage_1/centered_abs_mean": 0.1401743471622467,
"signal/frontier_coverage_1/group_std_mean": 0.182624551653862,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04287303537130356,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020044931676238774,
"signal/frontier_coverage_10/centered_abs_mean": 0.13549837470054626,
"signal/frontier_coverage_10/group_std_mean": 0.1764853775501251,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04146175310015678,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019376267679035663,
"signal/frontier_coverage_15/centered_abs_mean": 0.08775650560855866,
"signal/frontier_coverage_15/group_std_mean": 0.1143747478723526,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02687869928777218,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012549180304631592,
"signal/frontier_coverage_20/centered_abs_mean": 0.05184945985674858,
"signal/frontier_coverage_20/group_std_mean": 0.06713635325431824,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01587141491472721,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007414473104290665,
"signal/frontier_coverage_25/centered_abs_mean": 0.04992828816175461,
"signal/frontier_coverage_25/group_std_mean": 0.06476413011550904,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015272756479680539,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007139745284803212,
"signal/frontier_coverage_5/centered_abs_mean": 0.14012694954872132,
"signal/frontier_coverage_5/group_std_mean": 0.18256248235702516,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04285847619175911,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002003815281204879,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2622542232275009,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3334097921848297,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5582247734069824,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026225423067808153,
"step": 240
},
{
"calibration/aurc": 0.28529336333921895,
"calibration/batch_distribution_entropy": 0.9747004335667775,
"calibration/buffer_distribution_entropy": 0.9962397116658449,
"calibration/confidence_entropy": 0.4703582602948983,
"calibration/coverage@0%": 0.014453125,
"calibration/coverage@1%": 0.064453125,
"calibration/coverage@10%": 0.233203125,
"calibration/coverage@15%": 0.277734375,
"calibration/coverage@20%": 0.3390625,
"calibration/coverage@25%": 0.440234375,
"calibration/coverage@30%": 0.570703125,
"calibration/coverage@5%": 0.1765625,
"calibration/ece": 0.15734434185574941,
"calibration/mean_confidence": 0.5264813330703859,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 519.2,
"completions/max_terminated_length": 519.2,
"completions/mean_length": 245.15859375,
"completions/mean_terminated_length": 245.18346557617187,
"completions/min_length": 108.4,
"completions/min_terminated_length": 135.8,
"epoch": 0.784,
"grad_norm": 0.00773618882521987,
"learning_rate": 1e-06,
"loss": 0.0039,
"num_tokens": 831919390.0,
"reward": 0.9443397045135498,
"reward_std": 0.07677196860313415,
"rewards/accuracy_reward": 0.56416015625,
"rewards/brier_reward": 0.7939793109893799,
"rewards/confidence_uniqueness_reward": 0.952772068977356,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.09836947172880173,
"rewards/frontier_coverage_1": 0.09836947172880173,
"rewards/frontier_coverage_10": 0.0951840840280056,
"rewards/frontier_coverage_15": 0.07212830930948258,
"rewards/frontier_coverage_20": 0.05310492143034935,
"rewards/frontier_coverage_25": 0.06044500917196274,
"rewards/frontier_coverage_5": 0.09833909720182418,
"rewards/frontier_entropy_batch_reward": -0.2060262978076935,
"signal/accuracy_reward/centered_abs_mean": 0.083209228515625,
"signal/accuracy_reward/group_std_mean": 0.11050355583429336,
"signal/accuracy_reward/group_zero_std_frac": 0.678125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8190797328948974,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0416046142578125,
"signal/advantage_abs_mean": 0.7678187370300293,
"signal/advantage_pre_scale_abs_mean": 0.05949816852807999,
"signal/advantage_pre_scale_std": 0.09607118517160415,
"signal/advantage_std": 0.982481837272644,
"signal/brier_reward/centered_abs_mean": 0.10488737523555755,
"signal/brier_reward/group_std_mean": 0.134664386510849,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21070242822170257,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010488738119602204,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01203925535082817,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015293254517018795,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024537032842636107,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012039255816489458,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021954655647277834,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.14875861406326293,
"signal/frontier_coverage_0/group_std_mean": 0.18895536065101623,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04271491318941116,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002127248211763799,
"signal/frontier_coverage_1/centered_abs_mean": 0.14875861406326293,
"signal/frontier_coverage_1/group_std_mean": 0.18895536065101623,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04271491318941116,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002127248211763799,
"signal/frontier_coverage_10/centered_abs_mean": 0.14137446880340576,
"signal/frontier_coverage_10/group_std_mean": 0.17956486344337463,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04061263874173164,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020216548582538962,
"signal/frontier_coverage_15/centered_abs_mean": 0.08850065022706985,
"signal/frontier_coverage_15/group_std_mean": 0.11260762810707092,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025534508749842644,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012655592989176511,
"signal/frontier_coverage_20/centered_abs_mean": 0.0558601513504982,
"signal/frontier_coverage_20/group_std_mean": 0.0709412157535553,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01617070809006691,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007988001452758908,
"signal/frontier_coverage_25/centered_abs_mean": 0.056756097823381424,
"signal/frontier_coverage_25/group_std_mean": 0.07209322452545167,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01643000766634941,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008116121869534254,
"signal/frontier_coverage_5/centered_abs_mean": 0.14871807992458344,
"signal/frontier_coverage_5/group_std_mean": 0.1889048457145691,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042703104019165036,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021266685565933586,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2699484646320343,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3443294107913971,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5487543344497681,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026994846761226654,
"step": 245
},
{
"calibration/aurc": 0.19084901152374725,
"calibration/batch_distribution_entropy": 0.9701281507957082,
"calibration/buffer_distribution_entropy": 0.9964064744035049,
"calibration/confidence_entropy": 0.46336307930016296,
"calibration/coverage@0%": 0.033203125,
"calibration/coverage@1%": 0.033203125,
"calibration/coverage@10%": 0.47109375,
"calibration/coverage@15%": 0.54453125,
"calibration/coverage@20%": 0.6015625,
"calibration/coverage@25%": 0.66328125,
"calibration/coverage@30%": 0.724609375,
"calibration/coverage@5%": 0.2921875,
"calibration/ece": 0.1200733853568633,
"calibration/mean_confidence": 0.47809412117138994,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 538.0,
"completions/max_terminated_length": 538.0,
"completions/mean_length": 229.1568359375,
"completions/mean_terminated_length": 229.1568359375,
"completions/min_length": 122.8,
"completions/min_terminated_length": 122.8,
"epoch": 0.8,
"grad_norm": 0.00832080002874136,
"learning_rate": 1e-06,
"loss": 0.0015,
"num_tokens": 849276516.0,
"reward": 0.9613359928131103,
"reward_std": 0.07218454480171203,
"rewards/accuracy_reward": 0.60205078125,
"rewards/brier_reward": 0.8215224504470825,
"rewards/confidence_uniqueness_reward": 0.9497822642326355,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.10563646852970124,
"rewards/frontier_coverage_1": 0.10563646852970124,
"rewards/frontier_coverage_10": 0.1023257091641426,
"rewards/frontier_coverage_15": 0.07141236141324044,
"rewards/frontier_coverage_20": 0.059184766560792926,
"rewards/frontier_coverage_25": 0.07559897229075432,
"rewards/frontier_coverage_5": 0.10563607960939407,
"rewards/frontier_entropy_batch_reward": -0.2571470856666565,
"signal/accuracy_reward/centered_abs_mean": 0.070184326171875,
"signal/accuracy_reward/group_std_mean": 0.09646072834730149,
"signal/accuracy_reward/group_zero_std_frac": 0.7125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7383540868759155,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0350921630859375,
"signal/advantage_abs_mean": 0.773518168926239,
"signal/advantage_pre_scale_abs_mean": 0.05557697787880898,
"signal/advantage_pre_scale_std": 0.09160784184932709,
"signal/advantage_std": 0.9823978185653687,
"signal/brier_reward/centered_abs_mean": 0.09445693641901017,
"signal/brier_reward/group_std_mean": 0.12324505150318146,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19952306747436524,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009445693716406823,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013652579858899117,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017295270599424838,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029012787342071533,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013652580324560403,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020907722413539887,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13039756417274476,
"signal/frontier_coverage_0/group_std_mean": 0.1706514060497284,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039599084109067914,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018646851414814592,
"signal/frontier_coverage_1/centered_abs_mean": 0.13039756417274476,
"signal/frontier_coverage_1/group_std_mean": 0.1706514060497284,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039599084109067914,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018646851414814592,
"signal/frontier_coverage_10/centered_abs_mean": 0.12471685260534286,
"signal/frontier_coverage_10/group_std_mean": 0.163266384601593,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03788367658853531,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017834509257227183,
"signal/frontier_coverage_15/centered_abs_mean": 0.0766264021396637,
"signal/frontier_coverage_15/group_std_mean": 0.10058980733156205,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023319342732429506,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010957575403153896,
"signal/frontier_coverage_20/centered_abs_mean": 0.0527132585644722,
"signal/frontier_coverage_20/group_std_mean": 0.06767940372228623,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01602230276912451,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000753799604717642,
"signal/frontier_coverage_25/centered_abs_mean": 0.05887153521180153,
"signal/frontier_coverage_25/group_std_mean": 0.07468887120485306,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0178463090211153,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008418629644438624,
"signal/frontier_coverage_5/centered_abs_mean": 0.1303926795721054,
"signal/frontier_coverage_5/group_std_mean": 0.17064524292945862,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03959760367870331,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001864615362137556,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29742798805236814,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36837912201881406,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6314390063285827,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029742800071835516,
"step": 250
},
{
"epoch": 0.8,
"eval_calibration/aurc": 0.4398142182921856,
"eval_calibration/batch_distribution_entropy": 0.8782582909606457,
"eval_calibration/buffer_distribution_entropy": 0.9965439859027538,
"eval_calibration/confidence_entropy": 0.4680754710266846,
"eval_calibration/coverage@0%": 0.03125,
"eval_calibration/coverage@1%": 0.03125,
"eval_calibration/coverage@10%": 0.09375,
"eval_calibration/coverage@15%": 0.109375,
"eval_calibration/coverage@20%": 0.1171875,
"eval_calibration/coverage@25%": 0.2890625,
"eval_calibration/coverage@30%": 0.3671875,
"eval_calibration/coverage@5%": 0.03125,
"eval_calibration/ece": 0.19118800764913552,
"eval_calibration/mean_confidence": 0.4170219948301839,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 350.75,
"eval_completions/max_terminated_length": 350.75,
"eval_completions/mean_length": 220.90200805664062,
"eval_completions/mean_terminated_length": 220.90200805664062,
"eval_completions/min_length": 138.25,
"eval_completions/min_terminated_length": 138.25,
"eval_loss": 0.0,
"eval_num_tokens": 849276516.0,
"eval_reward": 0.8079598397016525,
"eval_reward_std": 0.22688810154795647,
"eval_rewards/accuracy_reward": 0.4453125,
"eval_rewards/brier_reward": 0.8076187521219254,
"eval_rewards/confidence_uniqueness_reward": 0.89892578125,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.20175327360630035,
"eval_rewards/frontier_coverage_1": 0.20175327360630035,
"eval_rewards/frontier_coverage_10": 0.19296763092279434,
"eval_rewards/frontier_coverage_15": 0.11704839393496513,
"eval_rewards/frontier_coverage_20": 0.06113920174539089,
"eval_rewards/frontier_coverage_25": 0.04800493270158768,
"eval_rewards/frontier_coverage_5": 0.2017485909163952,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 19.6093,
"eval_samples_per_second": 25.498,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4775390625,
"eval_signal/accuracy_reward/group_std_mean": 0.49622878432273865,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0528854429721832,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23876953125,
"eval_signal/advantage_abs_mean": 0.9418710172176361,
"eval_signal/advantage_pre_scale_abs_mean": 0.2140355482697487,
"eval_signal/advantage_pre_scale_std": 0.22431568056344986,
"eval_signal/advantage_std": 0.9876823276281357,
"eval_signal/brier_reward/centered_abs_mean": 0.1818385049700737,
"eval_signal/brier_reward/group_std_mean": 0.23753474280238152,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08033054694533348,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018183850217610598,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.040863037109375,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04884030018001795,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01809265185147524,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004086303699295968,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.38850048184394836,
"eval_signal/frontier_coverage_0/group_std_mean": 0.46720458567142487,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02454289235174656,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005555556854233146,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.38850048184394836,
"eval_signal/frontier_coverage_1/group_std_mean": 0.46720458567142487,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02454289235174656,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005555556854233146,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.37041959911584854,
"eval_signal/frontier_coverage_10/group_std_mean": 0.44689878821372986,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02340186294168234,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005297000170685351,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.21324453875422478,
"eval_signal/frontier_coverage_15/group_std_mean": 0.26560650020837784,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013475762913003564,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030493969097733498,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.0958902109414339,
"eval_signal/frontier_coverage_20/group_std_mean": 0.12255750596523285,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006055898265913129,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013712299696635455,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.0840714368969202,
"eval_signal/frontier_coverage_25/group_std_mean": 0.11348609812557697,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053028815891593695,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001202221552375704,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3884858936071396,
"eval_signal/frontier_coverage_5/group_std_mean": 0.4671877399086952,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.024541971273720264,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00555534812156111,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.204,
"step": 250
},
{
"calibration/aurc": 0.2088603333490008,
"calibration/batch_distribution_entropy": 0.9478072553473524,
"calibration/buffer_distribution_entropy": 0.9963203098504536,
"calibration/confidence_entropy": 0.4611221206641368,
"calibration/coverage@0%": 0.03203125,
"calibration/coverage@1%": 0.03203125,
"calibration/coverage@10%": 0.235546875,
"calibration/coverage@15%": 0.3703125,
"calibration/coverage@20%": 0.575,
"calibration/coverage@25%": 0.6890625,
"calibration/coverage@30%": 0.8125,
"calibration/coverage@5%": 0.114453125,
"calibration/ece": 0.15271014624473428,
"calibration/mean_confidence": 0.4928081139273847,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 408.6,
"completions/max_terminated_length": 408.6,
"completions/mean_length": 214.7255859375,
"completions/mean_terminated_length": 214.7255859375,
"completions/min_length": 116.6,
"completions/min_terminated_length": 116.6,
"epoch": 0.816,
"grad_norm": 0.013140806928277016,
"learning_rate": 1e-06,
"loss": 0.0024,
"num_tokens": 866574474.0,
"reward": 0.9542932391166687,
"reward_std": 0.07651791870594024,
"rewards/accuracy_reward": 0.5873046875,
"rewards/brier_reward": 0.8041110873222351,
"rewards/confidence_uniqueness_reward": 0.9512474060058593,
"rewards/format_reward": 1.0,
"rewards/frontier_coverage_0": 0.09355135262012482,
"rewards/frontier_coverage_1": 0.09355135262012482,
"rewards/frontier_coverage_10": 0.09087654128670693,
"rewards/frontier_coverage_15": 0.06994750797748565,
"rewards/frontier_coverage_20": 0.05408108681440353,
"rewards/frontier_coverage_25": 0.06337937340140343,
"rewards/frontier_coverage_5": 0.09354995414614678,
"rewards/frontier_entropy_batch_reward": -0.2288777768611908,
"signal/accuracy_reward/centered_abs_mean": 0.084423828125,
"signal/accuracy_reward/group_std_mean": 0.10996274501085282,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.846988070011139,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0422119140625,
"signal/advantage_abs_mean": 0.7726068496704102,
"signal/advantage_pre_scale_abs_mean": 0.05965398624539375,
"signal/advantage_pre_scale_std": 0.09631071537733078,
"signal/advantage_std": 0.9825076580047607,
"signal/brier_reward/centered_abs_mean": 0.10241206586360932,
"signal/brier_reward/group_std_mean": 0.13025801181793212,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20598589181900023,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010241207107901574,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012978291511535645,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01621840223670006,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02623649425804615,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012978291837498547,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_0/centered_abs_mean": 0.15378101766109467,
"signal/frontier_coverage_0/group_std_mean": 0.19407180547714234,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04420729205012321,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021990684792399405,
"signal/frontier_coverage_1/centered_abs_mean": 0.15378101766109467,
"signal/frontier_coverage_1/group_std_mean": 0.19407180547714234,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04420729205012321,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021990684792399405,
"signal/frontier_coverage_10/centered_abs_mean": 0.1467003881931305,
"signal/frontier_coverage_10/group_std_mean": 0.1849285840988159,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0421836256980896,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020978155313059687,
"signal/frontier_coverage_15/centered_abs_mean": 0.0903098776936531,
"signal/frontier_coverage_15/group_std_mean": 0.11397473961114883,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025973235443234443,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00129143123049289,
"signal/frontier_coverage_20/centered_abs_mean": 0.05505901947617531,
"signal/frontier_coverage_20/group_std_mean": 0.06974020153284073,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015893121249973774,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007873439695686102,
"signal/frontier_coverage_25/centered_abs_mean": 0.05497596263885498,
"signal/frontier_coverage_25/group_std_mean": 0.07016591727733612,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015900985337793827,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007861562888137996,
"signal/frontier_coverage_5/centered_abs_mean": 0.15375557243824006,
"signal/frontier_coverage_5/group_std_mean": 0.19404107630252837,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04419991746544838,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021987047512084246,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2851732075214386,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3577597439289093,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5751234650611877,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02851732075214386,
"step": 255
},
{
"calibration/aurc": 0.27243795055495346,
"calibration/batch_distribution_entropy": 0.9705400726977077,
"calibration/buffer_distribution_entropy": 0.9958187429831661,
"calibration/confidence_entropy": 0.48087383541988693,
"calibration/coverage@0%": 0.073046875,
"calibration/coverage@1%": 0.073046875,
"calibration/coverage@10%": 0.2484375,
"calibration/coverage@15%": 0.3125,
"calibration/coverage@20%": 0.36953125,
"calibration/coverage@25%": 0.437109375,
"calibration/coverage@30%": 0.5390625,
"calibration/coverage@5%": 0.185546875,
"calibration/ece": 0.12119329880682919,
"calibration/mean_confidence": 0.4874164737876063,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 442.8,
"completions/max_terminated_length": 442.8,
"completions/mean_length": 207.35478515625,
"completions/mean_terminated_length": 207.35478515625,
"completions/min_length": 112.8,
"completions/min_terminated_length": 112.8,
"epoch": 0.832,
"grad_norm": 0.0085527915507555,
"learning_rate": 1e-06,
"loss": 0.0004,
"num_tokens": 883706139.0,
"reward": 0.9477424025535583,
"reward_std": 0.07137095481157303,
"rewards/accuracy_reward": 0.56142578125,
"rewards/brier_reward": 0.8303583145141602,
"rewards/confidence_uniqueness_reward": 0.95125732421875,
"rewards/format_reward": 1.0,
"rewards/frontier_coverage_0": 0.13446062207221984,
"rewards/frontier_coverage_1": 0.13446062207221984,
"rewards/frontier_coverage_10": 0.12797623723745347,
"rewards/frontier_coverage_15": 0.08604731857776642,
"rewards/frontier_coverage_20": 0.06532630696892738,
"rewards/frontier_coverage_25": 0.07860753238201142,
"rewards/frontier_coverage_5": 0.13444683104753494,
"rewards/frontier_entropy_batch_reward": -0.22019013166427612,
"signal/accuracy_reward/centered_abs_mean": 0.073223876953125,
"signal/accuracy_reward/group_std_mean": 0.1009349599480629,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8066681504249573,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0366119384765625,
"signal/advantage_abs_mean": 0.7617290258407593,
"signal/advantage_pre_scale_abs_mean": 0.054251715540885925,
"signal/advantage_pre_scale_std": 0.09108677059412003,
"signal/advantage_std": 0.9823128700256347,
"signal/brier_reward/centered_abs_mean": 0.08786282539367676,
"signal/brier_reward/group_std_mean": 0.11602227240800858,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19413544237613678,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.008786282502114773,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012720155715942382,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015890151262283325,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028258343040943146,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012720155995339156,
"signal/format_reward/centered_abs_mean": 0.0,
"signal/format_reward/group_std_mean": 0.0,
"signal/format_reward/group_zero_std_frac": 1.0,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0,
"signal/frontier_coverage_0/centered_abs_mean": 0.13118936717510224,
"signal/frontier_coverage_0/group_std_mean": 0.17081800401210784,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0415608175098896,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018760079983621836,
"signal/frontier_coverage_1/centered_abs_mean": 0.13118936717510224,
"signal/frontier_coverage_1/group_std_mean": 0.17081800401210784,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0415608175098896,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018760079983621836,
"signal/frontier_coverage_10/centered_abs_mean": 0.12370103597640991,
"signal/frontier_coverage_10/group_std_mean": 0.16112378239631653,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0391960620880127,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017689247615635395,
"signal/frontier_coverage_15/centered_abs_mean": 0.07675774544477462,
"signal/frontier_coverage_15/group_std_mean": 0.10008619874715804,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024333661049604417,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010976357152685523,
"signal/frontier_coverage_20/centered_abs_mean": 0.051222600787878034,
"signal/frontier_coverage_20/group_std_mean": 0.06572640389204025,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01623872797936201,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007324831676669419,
"signal/frontier_coverage_25/centered_abs_mean": 0.055126645416021344,
"signal/frontier_coverage_25/group_std_mean": 0.07053503394126892,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017435478791594506,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007883110083639622,
"signal/frontier_coverage_5/centered_abs_mean": 0.1311732068657875,
"signal/frontier_coverage_5/group_std_mean": 0.17079680562019348,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04155569672584534,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001875776913948357,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.268852162361145,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3399739146232605,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5957050085067749,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02688521668314934,
"step": 260
},
{
"calibration/aurc": 0.2937613337989851,
"calibration/batch_distribution_entropy": 0.9661228954350038,
"calibration/buffer_distribution_entropy": 0.9959898170050472,
"calibration/confidence_entropy": 0.4870354325118241,
"calibration/coverage@0%": 0.07890930772994129,
"calibration/coverage@1%": 0.0863311827299413,
"calibration/coverage@10%": 0.2183624327299413,
"calibration/coverage@15%": 0.2832084760273973,
"calibration/coverage@20%": 0.44805222602739725,
"calibration/coverage@25%": 0.5054756298923679,
"calibration/coverage@30%": 0.5488365337573387,
"calibration/coverage@5%": 0.12383118272994129,
"calibration/ece": 0.16465495699217805,
"calibration/mean_confidence": 0.5443247483323702,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 418.8,
"completions/max_terminated_length": 418.8,
"completions/mean_length": 201.392578125,
"completions/mean_terminated_length": 201.4120880126953,
"completions/min_length": 92.6,
"completions/min_terminated_length": 114.6,
"epoch": 0.848,
"grad_norm": 0.011671687476336956,
"learning_rate": 1e-06,
"loss": 0.0009,
"num_tokens": 900782767.0,
"reward": 0.939740777015686,
"reward_std": 0.07238752394914627,
"rewards/accuracy_reward": 0.544921875,
"rewards/brier_reward": 0.822428572177887,
"rewards/confidence_uniqueness_reward": 0.952005398273468,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.13857043534517288,
"rewards/frontier_coverage_1": 0.13857043534517288,
"rewards/frontier_coverage_10": 0.1353096455335617,
"rewards/frontier_coverage_15": 0.08651280254125596,
"rewards/frontier_coverage_20": 0.05942459926009178,
"rewards/frontier_coverage_25": 0.066201800853014,
"rewards/frontier_coverage_5": 0.13855212330818176,
"rewards/frontier_entropy_batch_reward": -0.21027669906616211,
"signal/accuracy_reward/centered_abs_mean": 0.07392578125,
"signal/accuracy_reward/group_std_mean": 0.09971548616886139,
"signal/accuracy_reward/group_zero_std_frac": 0.709375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.810302484035492,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.036962890625,
"signal/advantage_abs_mean": 0.7670840382575989,
"signal/advantage_pre_scale_abs_mean": 0.05565920770168305,
"signal/advantage_pre_scale_std": 0.09414431601762771,
"signal/advantage_std": 0.9822983503341675,
"signal/brier_reward/centered_abs_mean": 0.09511243402957917,
"signal/brier_reward/group_std_mean": 0.12226448357105255,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21033987998962403,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009511243738234042,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01212963815778494,
"signal/confidence_uniqueness_reward/group_std_mean": 0.015583262778818607,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026886271312832832,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012129638576880097,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.002119513228535652,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.13417401611804963,
"signal/frontier_coverage_0/group_std_mean": 0.17274006307125092,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042503001540899275,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001918688416481018,
"signal/frontier_coverage_1/centered_abs_mean": 0.13417401611804963,
"signal/frontier_coverage_1/group_std_mean": 0.17274006307125092,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042503001540899275,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001918688416481018,
"signal/frontier_coverage_10/centered_abs_mean": 0.13046946972608567,
"signal/frontier_coverage_10/group_std_mean": 0.16800358295440673,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04133014753460884,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001865713344886899,
"signal/frontier_coverage_15/centered_abs_mean": 0.08114371299743653,
"signal/frontier_coverage_15/group_std_mean": 0.10468510389328003,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0257135309278965,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011603550752624868,
"signal/frontier_coverage_20/centered_abs_mean": 0.05167923718690872,
"signal/frontier_coverage_20/group_std_mean": 0.06598224192857742,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016394700668752194,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007390130776911974,
"signal/frontier_coverage_25/centered_abs_mean": 0.054591070115566256,
"signal/frontier_coverage_25/group_std_mean": 0.06988776028156281,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017325525730848314,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007806522771716118,
"signal/frontier_coverage_5/centered_abs_mean": 0.13415417671203614,
"signal/frontier_coverage_5/group_std_mean": 0.1727146774530411,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04249679148197174,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019184047123417258,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2662226051092148,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3375477910041809,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.58951895236969,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026622261106967925,
"step": 265
},
{
"calibration/aurc": 0.2683589162003546,
"calibration/batch_distribution_entropy": 0.9539461017175551,
"calibration/buffer_distribution_entropy": 0.996350172832674,
"calibration/confidence_entropy": 0.4911902207263893,
"calibration/coverage@0%": 0.014453125,
"calibration/coverage@1%": 0.014453125,
"calibration/coverage@10%": 0.190234375,
"calibration/coverage@15%": 0.234765625,
"calibration/coverage@20%": 0.275,
"calibration/coverage@25%": 0.455859375,
"calibration/coverage@30%": 0.701171875,
"calibration/coverage@5%": 0.10859375,
"calibration/ece": 0.1506179715369549,
"calibration/mean_confidence": 0.5922805837100238,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 439.2,
"completions/max_terminated_length": 439.2,
"completions/mean_length": 210.20791015625,
"completions/mean_terminated_length": 210.2282470703125,
"completions/min_length": 86.6,
"completions/min_terminated_length": 109.8,
"epoch": 0.864,
"grad_norm": 0.029889076948165894,
"learning_rate": 1e-06,
"loss": -0.0006,
"num_tokens": 917922112.0,
"reward": 0.9645228505134582,
"reward_std": 0.0741073101758957,
"rewards/accuracy_reward": 0.615234375,
"rewards/brier_reward": 0.8081081867218017,
"rewards/confidence_uniqueness_reward": 0.9501537799835205,
"rewards/format_reward": 0.99990234375,
"rewards/frontier_coverage_0": 0.07734835594892502,
"rewards/frontier_coverage_1": 0.07734835594892502,
"rewards/frontier_coverage_10": 0.07652404010295868,
"rewards/frontier_coverage_15": 0.05951971411705017,
"rewards/frontier_coverage_20": 0.05017582997679711,
"rewards/frontier_coverage_25": 0.06625718101859093,
"rewards/frontier_coverage_5": 0.0773268148303032,
"rewards/frontier_entropy_batch_reward": -0.258000922203064,
"signal/accuracy_reward/centered_abs_mean": 0.07364501953125,
"signal/accuracy_reward/group_std_mean": 0.0979606881737709,
"signal/accuracy_reward/group_zero_std_frac": 0.715625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7580254793167114,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.036822509765625,
"signal/advantage_abs_mean": 0.7813379168510437,
"signal/advantage_pre_scale_abs_mean": 0.05818412229418755,
"signal/advantage_pre_scale_std": 0.09502710700035095,
"signal/advantage_std": 0.9824476718902588,
"signal/brier_reward/centered_abs_mean": 0.0971069797873497,
"signal/brier_reward/group_std_mean": 0.1244538113474846,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20138957500457763,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009710697643458843,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01293389480561018,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016561723686754702,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02688128352165222,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012933894526213408,
"signal/format_reward/centered_abs_mean": 0.000189208984375,
"signal/format_reward/group_std_mean": 0.0005524271633476019,
"signal/format_reward/group_zero_std_frac": 0.996875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020027007907629014,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05,
"signal/frontier_coverage_0/centered_abs_mean": 0.1333679139614105,
"signal/frontier_coverage_0/group_std_mean": 0.17199209332466125,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03949750140309334,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019071611808612944,
"signal/frontier_coverage_1/centered_abs_mean": 0.1333679139614105,
"signal/frontier_coverage_1/group_std_mean": 0.17199209332466125,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03949750140309334,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019071611808612944,
"signal/frontier_coverage_10/centered_abs_mean": 0.12856392711400985,
"signal/frontier_coverage_10/group_std_mean": 0.16595734059810638,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.038083378970623014,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001838464173488319,
"signal/frontier_coverage_15/centered_abs_mean": 0.07582932710647583,
"signal/frontier_coverage_15/group_std_mean": 0.09822248071432113,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022478952631354333,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010843593161553144,
"signal/frontier_coverage_20/centered_abs_mean": 0.051568976044654845,
"signal/frontier_coverage_20/group_std_mean": 0.06592448204755783,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015313262306153774,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007374363485723734,
"signal/frontier_coverage_25/centered_abs_mean": 0.05620872303843498,
"signal/frontier_coverage_25/group_std_mean": 0.07158383950591088,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016713694483041764,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008037847350351512,
"signal/frontier_coverage_5/centered_abs_mean": 0.13332813382148742,
"signal/frontier_coverage_5/group_std_mean": 0.17194126546382904,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03948571756482124,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019065923523157835,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29495537281036377,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.36441039443016054,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6126240730285645,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029495537653565405,
"step": 270
},
{
"calibration/aurc": 0.336421051457335,
"calibration/batch_distribution_entropy": 0.9816314252287958,
"calibration/buffer_distribution_entropy": 0.9962709984337563,
"calibration/confidence_entropy": 0.45712846813455643,
"calibration/coverage@0%": 0.0078125,
"calibration/coverage@1%": 0.0078125,
"calibration/coverage@10%": 0.073828125,
"calibration/coverage@15%": 0.141015625,
"calibration/coverage@20%": 0.22578125,
"calibration/coverage@25%": 0.300390625,
"calibration/coverage@30%": 0.41171875,
"calibration/coverage@5%": 0.0140625,
"calibration/ece": 0.12420867423276796,
"calibration/mean_confidence": 0.48923535681011315,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0,
"completions/max_length": 564.8,
"completions/max_terminated_length": 564.8,
"completions/mean_length": 214.70439453125,
"completions/mean_terminated_length": 214.70439453125,
"completions/min_length": 112.6,
"completions/min_terminated_length": 112.6,
"epoch": 0.88,
"grad_norm": 0.009292150847613811,
"learning_rate": 1e-06,
"loss": -0.0012,
"num_tokens": 935267757.0,
"reward": 0.9303600192070007,
"reward_std": 0.07878989428281784,
"rewards/accuracy_reward": 0.5265625,
"rewards/brier_reward": 0.8139204859733582,
"rewards/confidence_uniqueness_reward": 0.9525596857070923,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.14567633271217345,
"rewards/frontier_coverage_1": 0.14567633271217345,
"rewards/frontier_coverage_10": 0.1417877972126007,
"rewards/frontier_coverage_15": 0.08948986679315567,
"rewards/frontier_coverage_20": 0.060767459124326705,
"rewards/frontier_coverage_25": 0.06566002145409584,
"rewards/frontier_coverage_5": 0.1456627994775772,
"rewards/frontier_entropy_batch_reward": -0.20836096704006196,
"signal/accuracy_reward/centered_abs_mean": 0.0876708984375,
"signal/accuracy_reward/group_std_mean": 0.11543703377246857,
"signal/accuracy_reward/group_zero_std_frac": 0.66875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9269353866577148,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.04383544921875,
"signal/advantage_abs_mean": 0.7721322894096374,
"signal/advantage_pre_scale_abs_mean": 0.061681386828422544,
"signal/advantage_pre_scale_std": 0.10155243873596191,
"signal/advantage_std": 0.9824026703834534,
"signal/brier_reward/centered_abs_mean": 0.10448898226022721,
"signal/brier_reward/group_std_mean": 0.1366585373878479,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22174761891365052,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010448898747563362,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012470530718564988,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016169047355651854,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026379484310746194,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012470531044527888,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.003887416422367096,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.15089958012104035,
"signal/frontier_coverage_0/group_std_mean": 0.19479792714118957,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.045803869515657424,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002157864021137357,
"signal/frontier_coverage_1/centered_abs_mean": 0.15089958012104035,
"signal/frontier_coverage_1/group_std_mean": 0.19479792714118957,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.045803869515657424,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002157864021137357,
"signal/frontier_coverage_10/centered_abs_mean": 0.14600327014923095,
"signal/frontier_coverage_10/group_std_mean": 0.18865067064762114,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04432102143764496,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020878467708826064,
"signal/frontier_coverage_15/centered_abs_mean": 0.0892187312245369,
"signal/frontier_coverage_15/group_std_mean": 0.11559360474348068,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027101678028702735,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012758278753608465,
"signal/frontier_coverage_20/centered_abs_mean": 0.05786134228110314,
"signal/frontier_coverage_20/group_std_mean": 0.07404239922761917,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017570355907082558,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000827417231630534,
"signal/frontier_coverage_25/centered_abs_mean": 0.058361977338790894,
"signal/frontier_coverage_25/group_std_mean": 0.07477803826332093,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017693167179822923,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008345762616954744,
"signal/frontier_coverage_5/centered_abs_mean": 0.1508888840675354,
"signal/frontier_coverage_5/group_std_mean": 0.19478428065776826,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.045800501853227614,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021577110514044763,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2678882539272308,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.34042693972587584,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5680663108825683,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026788827031850815,
"step": 275
},
{
"calibration/aurc": 0.30996998547659854,
"calibration/batch_distribution_entropy": 0.9863903465512681,
"calibration/buffer_distribution_entropy": 0.9963396773180555,
"calibration/confidence_entropy": 0.491680642724153,
"calibration/coverage@0%": 0.03242263943248532,
"calibration/coverage@1%": 0.03242263943248532,
"calibration/coverage@10%": 0.1660163894324853,
"calibration/coverage@15%": 0.2105476394324853,
"calibration/coverage@20%": 0.3489664872798434,
"calibration/coverage@25%": 0.4736790606653621,
"calibration/coverage@30%": 0.5557447101272015,
"calibration/coverage@5%": 0.08164138943248532,
"calibration/ece": 0.14309415417650714,
"calibration/mean_confidence": 0.4845343095818896,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 592.6,
"completions/max_terminated_length": 592.6,
"completions/mean_length": 222.80419921875,
"completions/mean_terminated_length": 222.82596740722656,
"completions/min_length": 91.2,
"completions/min_terminated_length": 114.2,
"epoch": 0.896,
"grad_norm": 0.010289231315255165,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 952660120.0,
"reward": 0.9489871025085449,
"reward_std": 0.0732444629073143,
"rewards/accuracy_reward": 0.56728515625,
"rewards/brier_reward": 0.8108451724052429,
"rewards/confidence_uniqueness_reward": 0.9530300736427307,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.11231801509857178,
"rewards/frontier_coverage_1": 0.11231801509857178,
"rewards/frontier_coverage_10": 0.11174041628837586,
"rewards/frontier_coverage_15": 0.07842456176877022,
"rewards/frontier_coverage_20": 0.05463726818561554,
"rewards/frontier_coverage_25": 0.06190679222345352,
"rewards/frontier_coverage_5": 0.11231775730848312,
"rewards/frontier_entropy_batch_reward": -0.20149709582328795,
"signal/accuracy_reward/centered_abs_mean": 0.077850341796875,
"signal/accuracy_reward/group_std_mean": 0.10645336210727692,
"signal/accuracy_reward/group_zero_std_frac": 0.690625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.816600227355957,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0389251708984375,
"signal/advantage_abs_mean": 0.7607104301452636,
"signal/advantage_pre_scale_abs_mean": 0.05540677979588508,
"signal/advantage_pre_scale_std": 0.09241203665733337,
"signal/advantage_std": 0.9824150681495667,
"signal/brier_reward/centered_abs_mean": 0.10257258415222167,
"signal/brier_reward/group_std_mean": 0.13208626657724382,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21615420579910277,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010257259011268616,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01184554658830166,
"signal/confidence_uniqueness_reward/group_std_mean": 0.01544493790715933,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02498168535530567,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011845546774566173,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0039098581299185755,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.14897378981113435,
"signal/frontier_coverage_0/group_std_mean": 0.19240249693393707,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.044983573257923126,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021303252782672645,
"signal/frontier_coverage_1/centered_abs_mean": 0.14897378981113435,
"signal/frontier_coverage_1/group_std_mean": 0.19240249693393707,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.044983573257923126,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021303252782672645,
"signal/frontier_coverage_10/centered_abs_mean": 0.14458298087120056,
"signal/frontier_coverage_10/group_std_mean": 0.18663305044174194,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04364226087927818,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020675365580245853,
"signal/frontier_coverage_15/centered_abs_mean": 0.09132444709539414,
"signal/frontier_coverage_15/group_std_mean": 0.1177283689379692,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027592913806438447,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001305939583107829,
"signal/frontier_coverage_20/centered_abs_mean": 0.055938445031642914,
"signal/frontier_coverage_20/group_std_mean": 0.07106180787086487,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01688665710389614,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007999197579920292,
"signal/frontier_coverage_25/centered_abs_mean": 0.056100095808506015,
"signal/frontier_coverage_25/group_std_mean": 0.07118469923734665,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016873976215720177,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008022313704714179,
"signal/frontier_coverage_5/centered_abs_mean": 0.1489583134651184,
"signal/frontier_coverage_5/group_std_mean": 0.1923828214406967,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0449789248406887,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021301037166267635,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26766058802604675,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3394420027732849,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5642510890960694,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026766058057546616,
"step": 280
},
{
"calibration/aurc": 0.3400950078940635,
"calibration/batch_distribution_entropy": 0.9797264814453449,
"calibration/buffer_distribution_entropy": 0.9966567405586122,
"calibration/confidence_entropy": 0.48842966793674425,
"calibration/coverage@0%": 0.00703660102739726,
"calibration/coverage@1%": 0.00703660102739726,
"calibration/coverage@10%": 0.043757644324853226,
"calibration/coverage@15%": 0.13246774094911937,
"calibration/coverage@20%": 0.2891351210861057,
"calibration/coverage@25%": 0.3969644386007828,
"calibration/coverage@30%": 0.4845240643346379,
"calibration/coverage@5%": 0.00703660102739726,
"calibration/ece": 0.12737401156946584,
"calibration/mean_confidence": 0.5138945779347275,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00029296875,
"completions/max_length": 513.2,
"completions/max_terminated_length": 513.2,
"completions/mean_length": 229.2587890625,
"completions/mean_terminated_length": 229.32558898925782,
"completions/min_length": 95.4,
"completions/min_terminated_length": 117.8,
"epoch": 0.912,
"grad_norm": 0.010692852549254894,
"learning_rate": 1e-06,
"loss": 0.0,
"num_tokens": 970059026.0,
"reward": 0.9425465822219848,
"reward_std": 0.07733960896730423,
"rewards/accuracy_reward": 0.555859375,
"rewards/brier_reward": 0.8171261787414551,
"rewards/confidence_uniqueness_reward": 0.9516117811203003,
"rewards/format_reward": 0.999609375,
"rewards/frontier_coverage_0": 0.11456998586654663,
"rewards/frontier_coverage_1": 0.11456998586654663,
"rewards/frontier_coverage_10": 0.11256050020456314,
"rewards/frontier_coverage_15": 0.07863751500844955,
"rewards/frontier_coverage_20": 0.0569301575422287,
"rewards/frontier_coverage_25": 0.06953249722719193,
"rewards/frontier_coverage_5": 0.11456334218382835,
"rewards/frontier_entropy_batch_reward": -0.21519100069999694,
"signal/accuracy_reward/centered_abs_mean": 0.0781494140625,
"signal/accuracy_reward/group_std_mean": 0.10967252552509307,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7355853736400604,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.03907470703125,
"signal/advantage_abs_mean": 0.7514355540275574,
"signal/advantage_pre_scale_abs_mean": 0.05810001492500305,
"signal/advantage_pre_scale_std": 0.09480322301387786,
"signal/advantage_std": 0.9825671911239624,
"signal/brier_reward/centered_abs_mean": 0.10597307980060577,
"signal/brier_reward/group_std_mean": 0.1378685712814331,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2066969782114029,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010597308166325092,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.012444668635725974,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016399627551436424,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024437383562326432,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012444668915122747,
"signal/format_reward/centered_abs_mean": 0.00074462890625,
"signal/format_reward/group_std_mean": 0.0018734002020210027,
"signal/format_reward/group_zero_std_frac": 0.990625,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.008089378848671913,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000372314453125,
"signal/frontier_coverage_0/centered_abs_mean": 0.14773554503917694,
"signal/frontier_coverage_0/group_std_mean": 0.1906901478767395,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041051150858402254,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002112618274986744,
"signal/frontier_coverage_1/centered_abs_mean": 0.14773554503917694,
"signal/frontier_coverage_1/group_std_mean": 0.1906901478767395,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041051150858402254,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002112618274986744,
"signal/frontier_coverage_10/centered_abs_mean": 0.14383466243743898,
"signal/frontier_coverage_10/group_std_mean": 0.18568643629550935,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03997599333524704,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020568356849253178,
"signal/frontier_coverage_15/centered_abs_mean": 0.09209516048431396,
"signal/frontier_coverage_15/group_std_mean": 0.11944440305233002,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025683220103383066,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013169607846066356,
"signal/frontier_coverage_20/centered_abs_mean": 0.0584987074136734,
"signal/frontier_coverage_20/group_std_mean": 0.07466120570898056,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016389196924865245,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008365315268747508,
"signal/frontier_coverage_25/centered_abs_mean": 0.06181689128279686,
"signal/frontier_coverage_25/group_std_mean": 0.07882332503795624,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01727975495159626,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008839815389364957,
"signal/frontier_coverage_5/centered_abs_mean": 0.147720268368721,
"signal/frontier_coverage_5/group_std_mean": 0.1906701147556305,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04104689806699753,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021123998798429967,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2638342171907425,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.33859837651252744,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5125030159950257,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026383423060178757,
"step": 285
},
{
"calibration/aurc": 0.38766553715032764,
"calibration/batch_distribution_entropy": 0.981518331491662,
"calibration/buffer_distribution_entropy": 0.9970985889687846,
"calibration/confidence_entropy": 0.503844832131577,
"calibration/coverage@0%": 0.00390625,
"calibration/coverage@1%": 0.00390625,
"calibration/coverage@10%": 0.0125,
"calibration/coverage@15%": 0.039453125,
"calibration/coverage@20%": 0.10234375,
"calibration/coverage@25%": 0.214453125,
"calibration/coverage@30%": 0.271875,
"calibration/coverage@5%": 0.00390625,
"calibration/ece": 0.10925647180136108,
"calibration/mean_confidence": 0.4602850216584913,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 9.765625e-05,
"completions/max_length": 554.6,
"completions/max_terminated_length": 554.6,
"completions/mean_length": 232.66123046875,
"completions/mean_terminated_length": 232.6836395263672,
"completions/min_length": 91.2,
"completions/min_terminated_length": 115.4,
"epoch": 0.928,
"grad_norm": 0.011063729412853718,
"learning_rate": 1e-06,
"loss": -0.0008,
"num_tokens": 987468293.0,
"reward": 0.9349685192108155,
"reward_std": 0.07279682755470276,
"rewards/accuracy_reward": 0.54267578125,
"rewards/brier_reward": 0.809288215637207,
"rewards/confidence_uniqueness_reward": 0.9514593601226806,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.12597917169332504,
"rewards/frontier_coverage_1": 0.12597917169332504,
"rewards/frontier_coverage_10": 0.12356604933738709,
"rewards/frontier_coverage_15": 0.08888857066631317,
"rewards/frontier_coverage_20": 0.06169629544019699,
"rewards/frontier_coverage_25": 0.06558309346437455,
"rewards/frontier_coverage_5": 0.1259745851159096,
"rewards/frontier_entropy_batch_reward": -0.2260911613702774,
"signal/accuracy_reward/centered_abs_mean": 0.074444580078125,
"signal/accuracy_reward/group_std_mean": 0.09707383662462235,
"signal/accuracy_reward/group_zero_std_frac": 0.728125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7905593991279602,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0372222900390625,
"signal/advantage_abs_mean": 0.7814563870429992,
"signal/advantage_pre_scale_abs_mean": 0.05725823864340782,
"signal/advantage_pre_scale_std": 0.09308888167142867,
"signal/advantage_std": 0.98237384557724,
"signal/brier_reward/centered_abs_mean": 0.10359592139720916,
"signal/brier_reward/group_std_mean": 0.13340485095977783,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22268273532390595,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.01035959217697382,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013044451922178268,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016821864247322082,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028180352598428726,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013044452294707297,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.00401168242096901,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.14871254861354827,
"signal/frontier_coverage_0/group_std_mean": 0.18960267603397368,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04581791833043099,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002126589324325323,
"signal/frontier_coverage_1/centered_abs_mean": 0.14871254861354827,
"signal/frontier_coverage_1/group_std_mean": 0.18960267603397368,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04581791833043099,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002126589324325323,
"signal/frontier_coverage_10/centered_abs_mean": 0.14471837282180786,
"signal/frontier_coverage_10/group_std_mean": 0.1845400959253311,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04460237473249436,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002069472731091082,
"signal/frontier_coverage_15/centered_abs_mean": 0.09628659933805465,
"signal/frontier_coverage_15/group_std_mean": 0.12331466376781464,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029709017276763915,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013768984004855155,
"signal/frontier_coverage_20/centered_abs_mean": 0.058573897927999496,
"signal/frontier_coverage_20/group_std_mean": 0.07467443645000457,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018021496012806892,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008376067155040801,
"signal/frontier_coverage_25/centered_abs_mean": 0.0570153571665287,
"signal/frontier_coverage_25/group_std_mean": 0.07228669673204421,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017514837346971034,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008153195609338582,
"signal/frontier_coverage_5/centered_abs_mean": 0.14869737327098848,
"signal/frontier_coverage_5/group_std_mean": 0.18958347141742707,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04581324979662895,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021263723261654376,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27987065613269807,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3498177230358124,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6000606775283813,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02798706628382206,
"step": 290
},
{
"calibration/aurc": 0.23378421040895736,
"calibration/batch_distribution_entropy": 0.9588926911239236,
"calibration/buffer_distribution_entropy": 0.9973781770111927,
"calibration/confidence_entropy": 0.4632707925429629,
"calibration/coverage@0%": 0.026171875,
"calibration/coverage@1%": 0.051171875,
"calibration/coverage@10%": 0.203515625,
"calibration/coverage@15%": 0.390625,
"calibration/coverage@20%": 0.52109375,
"calibration/coverage@25%": 0.619140625,
"calibration/coverage@30%": 0.683984375,
"calibration/coverage@5%": 0.100390625,
"calibration/ece": 0.1241622176784349,
"calibration/mean_confidence": 0.44058108322948064,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0001953125,
"completions/max_length": 568.0,
"completions/max_terminated_length": 568.0,
"completions/mean_length": 233.37685546875,
"completions/mean_terminated_length": 233.42279663085938,
"completions/min_length": 93.4,
"completions/min_terminated_length": 118.2,
"epoch": 0.944,
"grad_norm": 0.01295262761414051,
"learning_rate": 1e-06,
"loss": 0.0006,
"num_tokens": 1004833496.0,
"reward": 0.9346524477005005,
"reward_std": 0.07876813262701035,
"rewards/accuracy_reward": 0.54560546875,
"rewards/brier_reward": 0.8073648333549499,
"rewards/confidence_uniqueness_reward": 0.9499351501464843,
"rewards/format_reward": 0.9998046875,
"rewards/frontier_coverage_0": 0.13496437668800354,
"rewards/frontier_coverage_1": 0.13496437668800354,
"rewards/frontier_coverage_10": 0.13205459117889404,
"rewards/frontier_coverage_15": 0.09704540967941284,
"rewards/frontier_coverage_20": 0.0626195065677166,
"rewards/frontier_coverage_25": 0.060534913837909696,
"rewards/frontier_coverage_5": 0.13495712727308273,
"rewards/frontier_entropy_batch_reward": -0.24609753489494324,
"signal/accuracy_reward/centered_abs_mean": 0.095135498046875,
"signal/accuracy_reward/group_std_mean": 0.12562316060066223,
"signal/accuracy_reward/group_zero_std_frac": 0.6375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9591840744018555,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0475677490234375,
"signal/advantage_abs_mean": 0.7657712578773499,
"signal/advantage_pre_scale_abs_mean": 0.06128266230225563,
"signal/advantage_pre_scale_std": 0.09885531663894653,
"signal/advantage_std": 0.9824792623519898,
"signal/brier_reward/centered_abs_mean": 0.10642676800489426,
"signal/brier_reward/group_std_mean": 0.13670923560857773,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21600624322891235,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010642676800489425,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.013227501884102821,
"signal/confidence_uniqueness_reward/group_std_mean": 0.017046448588371278,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027110712230205537,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013227502349764109,
"signal/format_reward/centered_abs_mean": 0.00037841796875,
"signal/format_reward/group_std_mean": 0.0011048543266952038,
"signal/format_reward/group_zero_std_frac": 0.99375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.0038907095789909364,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000189208984375,
"signal/frontier_coverage_0/centered_abs_mean": 0.17288099527359008,
"signal/frontier_coverage_0/group_std_mean": 0.22045514285564421,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.05001463890075684,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002472198219038546,
"signal/frontier_coverage_1/centered_abs_mean": 0.17288099527359008,
"signal/frontier_coverage_1/group_std_mean": 0.22045514285564421,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.05001463890075684,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002472198219038546,
"signal/frontier_coverage_10/centered_abs_mean": 0.16872143149375915,
"signal/frontier_coverage_10/group_std_mean": 0.21528524458408355,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.048827193677425385,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024127164389938114,
"signal/frontier_coverage_15/centered_abs_mean": 0.11024031639099122,
"signal/frontier_coverage_15/group_std_mean": 0.14126538336277009,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03192974366247654,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001576436497271061,
"signal/frontier_coverage_20/centered_abs_mean": 0.06356698796153068,
"signal/frontier_coverage_20/group_std_mean": 0.08100138902664185,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01849036365747452,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009090078994631768,
"signal/frontier_coverage_25/centered_abs_mean": 0.054581372439861296,
"signal/frontier_coverage_25/group_std_mean": 0.06919515281915664,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01598366592079401,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007805136265233159,
"signal/frontier_coverage_5/centered_abs_mean": 0.17286439538002013,
"signal/frontier_coverage_5/group_std_mean": 0.22043438553810119,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.05000990778207779,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024719608947634695,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.278346860408783,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.35178478956222536,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5688707113265992,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027834687754511832,
"step": 295
},
{
"calibration/aurc": 0.31441186503547164,
"calibration/batch_distribution_entropy": 0.9633186459663221,
"calibration/buffer_distribution_entropy": 0.9972034755573844,
"calibration/confidence_entropy": 0.449966523396641,
"calibration/coverage@0%": 0.01328125,
"calibration/coverage@1%": 0.01328125,
"calibration/coverage@10%": 0.119921875,
"calibration/coverage@15%": 0.2625,
"calibration/coverage@20%": 0.3,
"calibration/coverage@25%": 0.37109375,
"calibration/coverage@30%": 0.51015625,
"calibration/coverage@5%": 0.0390625,
"calibration/ece": 0.1385596621596186,
"calibration/mean_confidence": 0.5507708088766715,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.000390625,
"completions/max_length": 670.2,
"completions/max_terminated_length": 670.2,
"completions/mean_length": 231.98203125,
"completions/mean_terminated_length": 232.07284545898438,
"completions/min_length": 68.2,
"completions/min_terminated_length": 114.8,
"epoch": 0.96,
"grad_norm": 0.008244195021688938,
"learning_rate": 1e-06,
"loss": -0.0072,
"num_tokens": 1022149312.0,
"reward": 0.9294110298156738,
"reward_std": 0.0755992129445076,
"rewards/accuracy_reward": 0.532421875,
"rewards/brier_reward": 0.8261340737342835,
"rewards/confidence_uniqueness_reward": 0.9491169810295105,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.15588035583496093,
"rewards/frontier_coverage_1": 0.15588035583496093,
"rewards/frontier_coverage_10": 0.15350482165813445,
"rewards/frontier_coverage_15": 0.10767751783132554,
"rewards/frontier_coverage_20": 0.07077482268214226,
"rewards/frontier_coverage_25": 0.07669939547777176,
"rewards/frontier_coverage_5": 0.15587256848812103,
"rewards/frontier_entropy_batch_reward": -0.26562986969947816,
"signal/accuracy_reward/centered_abs_mean": 0.07545166015625,
"signal/accuracy_reward/group_std_mean": 0.10325214564800263,
"signal/accuracy_reward/group_zero_std_frac": 0.69375,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7740512013435363,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.037725830078125,
"signal/advantage_abs_mean": 0.7682592153549195,
"signal/advantage_pre_scale_abs_mean": 0.05740421935915947,
"signal/advantage_pre_scale_std": 0.09488718807697297,
"signal/advantage_std": 0.982464599609375,
"signal/brier_reward/centered_abs_mean": 0.09947880506515502,
"signal/brier_reward/group_std_mean": 0.1302649974822998,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20439462959766388,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009947880543768406,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014108524098992348,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019470517709851264,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029055507853627205,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014108523493632675,
"signal/format_reward/centered_abs_mean": 0.00113525390625,
"signal/format_reward/group_std_mean": 0.0033145629800856113,
"signal/format_reward/group_zero_std_frac": 0.98125,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.011646222323179245,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000567626953125,
"signal/frontier_coverage_0/centered_abs_mean": 0.142266646027565,
"signal/frontier_coverage_0/group_std_mean": 0.18246809244155884,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04174907729029655,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020344130927696822,
"signal/frontier_coverage_1/centered_abs_mean": 0.142266646027565,
"signal/frontier_coverage_1/group_std_mean": 0.18246809244155884,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04174907729029655,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020344130927696822,
"signal/frontier_coverage_10/centered_abs_mean": 0.14002286195755004,
"signal/frontier_coverage_10/group_std_mean": 0.17962463200092316,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041089994460344316,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002002326911315322,
"signal/frontier_coverage_15/centered_abs_mean": 0.09163234382867813,
"signal/frontier_coverage_15/group_std_mean": 0.11807395815849304,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026894450187683105,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00131034255027771,
"signal/frontier_coverage_20/centered_abs_mean": 0.05888952389359474,
"signal/frontier_coverage_20/group_std_mean": 0.07460112124681473,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01728636063635349,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008421201724559069,
"signal/frontier_coverage_25/centered_abs_mean": 0.060860900580883025,
"signal/frontier_coverage_25/group_std_mean": 0.07711833715438843,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017893003672361373,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008703108527697623,
"signal/frontier_coverage_5/centered_abs_mean": 0.14225718677043914,
"signal/frontier_coverage_5/group_std_mean": 0.1824559450149536,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04174629151821137,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020342777483165265,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29106062054634096,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3644901514053345,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5973328590393067,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02910606376826763,
"step": 300
},
{
"epoch": 0.96,
"eval_calibration/aurc": 0.4415723707304741,
"eval_calibration/batch_distribution_entropy": 0.9349213247980521,
"eval_calibration/buffer_distribution_entropy": 0.9966973513521955,
"eval_calibration/confidence_entropy": 0.47848555053430986,
"eval_calibration/coverage@0%": 0.0859375,
"eval_calibration/coverage@1%": 0.0859375,
"eval_calibration/coverage@10%": 0.0859375,
"eval_calibration/coverage@15%": 0.109375,
"eval_calibration/coverage@20%": 0.1484375,
"eval_calibration/coverage@25%": 0.1796875,
"eval_calibration/coverage@30%": 0.2109375,
"eval_calibration/coverage@5%": 0.0859375,
"eval_calibration/ece": 0.1948771309265625,
"eval_calibration/mean_confidence": 0.45367350186093747,
"eval_completions/clipped_ratio": 0.0,
"eval_completions/max_length": 457.0,
"eval_completions/max_terminated_length": 457.0,
"eval_completions/mean_length": 241.4718475341797,
"eval_completions/mean_terminated_length": 241.4718475341797,
"eval_completions/min_length": 144.25,
"eval_completions/min_terminated_length": 144.25,
"eval_loss": 0.0,
"eval_num_tokens": 1022149312.0,
"eval_reward": 0.8029894828796387,
"eval_reward_std": 0.2307339571416378,
"eval_rewards/accuracy_reward": 0.435546875,
"eval_rewards/brier_reward": 0.8061055541038513,
"eval_rewards/confidence_uniqueness_reward": 0.8974609375,
"eval_rewards/format_reward": 1.0,
"eval_rewards/frontier_coverage_0": 0.1957620456814766,
"eval_rewards/frontier_coverage_1": 0.1957620456814766,
"eval_rewards/frontier_coverage_10": 0.19467196241021156,
"eval_rewards/frontier_coverage_15": 0.13251663371920586,
"eval_rewards/frontier_coverage_20": 0.06984788924455643,
"eval_rewards/frontier_coverage_25": 0.05480411183089018,
"eval_rewards/frontier_coverage_5": 0.1957547478377819,
"eval_rewards/frontier_entropy_batch_reward": -1.0,
"eval_runtime": 23.0845,
"eval_samples_per_second": 21.66,
"eval_signal/accuracy_reward/centered_abs_mean": 0.4744873046875,
"eval_signal/accuracy_reward/group_std_mean": 0.4946645200252533,
"eval_signal/accuracy_reward/group_zero_std_frac": 0.0,
"eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0288754552602768,
"eval_signal/accuracy_reward/weight": 0.5,
"eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23724365234375,
"eval_signal/advantage_abs_mean": 0.939881756901741,
"eval_signal/advantage_pre_scale_abs_mean": 0.21714717894792557,
"eval_signal/advantage_pre_scale_std": 0.2281285598874092,
"eval_signal/advantage_std": 0.9876896291971207,
"eval_signal/brier_reward/centered_abs_mean": 0.18803402036428452,
"eval_signal/brier_reward/group_std_mean": 0.23641518875956535,
"eval_signal/brier_reward/group_zero_std_frac": 0.0,
"eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08167162910103798,
"eval_signal/brier_reward/weight": 0.10000000149011612,
"eval_signal/brier_reward/weighted_centered_abs_mean": 0.018803401850163937,
"eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0414581298828125,
"eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04806934855878353,
"eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01800214545801282,
"eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004145812941715121,
"eval_signal/format_reward/centered_abs_mean": 0.0,
"eval_signal/format_reward/group_std_mean": 0.0,
"eval_signal/format_reward/group_zero_std_frac": 1.0,
"eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/format_reward/weight": 0.5,
"eval_signal/format_reward/weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_coverage_0/centered_abs_mean": 0.36271509528160095,
"eval_signal/frontier_coverage_0/group_std_mean": 0.4364357739686966,
"eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02251249784603715,
"eval_signal/frontier_coverage_0/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005186826107092202,
"eval_signal/frontier_coverage_1/centered_abs_mean": 0.36271509528160095,
"eval_signal/frontier_coverage_1/group_std_mean": 0.4364357739686966,
"eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02251249784603715,
"eval_signal/frontier_coverage_1/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005186826107092202,
"eval_signal/frontier_coverage_10/centered_abs_mean": 0.36009519547224045,
"eval_signal/frontier_coverage_10/group_std_mean": 0.4333682507276535,
"eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022349967621266842,
"eval_signal/frontier_coverage_10/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005149361444637179,
"eval_signal/frontier_coverage_15/centered_abs_mean": 0.2319328859448433,
"eval_signal/frontier_coverage_15/group_std_mean": 0.28401825577020645,
"eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01439695293083787,
"eval_signal/frontier_coverage_15/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003316640213597566,
"eval_signal/frontier_coverage_20/centered_abs_mean": 0.10108834877610207,
"eval_signal/frontier_coverage_20/group_std_mean": 0.12704241834580898,
"eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006276872823946178,
"eval_signal/frontier_coverage_20/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014455633936449885,
"eval_signal/frontier_coverage_25/centered_abs_mean": 0.09253444895148277,
"eval_signal/frontier_coverage_25/group_std_mean": 0.1211695522069931,
"eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00573564157821238,
"eval_signal/frontier_coverage_25/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013232426135800779,
"eval_signal/frontier_coverage_5/centered_abs_mean": 0.3626917079091072,
"eval_signal/frontier_coverage_5/group_std_mean": 0.43640825897455215,
"eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022511047311127186,
"eval_signal/frontier_coverage_5/weight": 0.014299999922513962,
"eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005186491413041949,
"eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0,
"eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0,
"eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0,
"eval_steps_per_second": 0.173,
"step": 300
},
{
"calibration/aurc": 0.2298664176915633,
"calibration/batch_distribution_entropy": 0.9806768056698789,
"calibration/buffer_distribution_entropy": 0.9966620218604406,
"calibration/confidence_entropy": 0.5069850261975526,
"calibration/coverage@0%": 0.07621101424361493,
"calibration/coverage@1%": 0.10316413924361494,
"calibration/coverage@10%": 0.327171844302554,
"calibration/coverage@15%": 0.4347433693516699,
"calibration/coverage@20%": 0.5016047089882122,
"calibration/coverage@25%": 0.571963255157171,
"calibration/coverage@30%": 0.6295002455795677,
"calibration/coverage@5%": 0.20215572814341848,
"calibration/ece": 0.1638740621100828,
"calibration/mean_confidence": 0.4818879265565929,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.0005859375,
"completions/max_length": 609.2,
"completions/max_terminated_length": 609.2,
"completions/mean_length": 246.87958984375,
"completions/mean_terminated_length": 247.02230834960938,
"completions/min_length": 81.2,
"completions/min_terminated_length": 130.4,
"epoch": 0.976,
"grad_norm": 0.013785382732748985,
"learning_rate": 1e-06,
"loss": -0.0002,
"num_tokens": 1039538479.0,
"reward": 0.9428975343704223,
"reward_std": 0.07416855841875077,
"rewards/accuracy_reward": 0.56025390625,
"rewards/brier_reward": 0.8010736227035522,
"rewards/confidence_uniqueness_reward": 0.9515250086784363,
"rewards/format_reward": 0.9994140625,
"rewards/frontier_coverage_0": 0.10944837052375078,
"rewards/frontier_coverage_1": 0.10944837052375078,
"rewards/frontier_coverage_10": 0.10874446658417583,
"rewards/frontier_coverage_15": 0.08354733660817146,
"rewards/frontier_coverage_20": 0.05517433062195778,
"rewards/frontier_coverage_25": 0.058141480386257174,
"rewards/frontier_coverage_5": 0.10943909073248506,
"rewards/frontier_entropy_batch_reward": -0.2126171350479126,
"signal/accuracy_reward/centered_abs_mean": 0.076971435546875,
"signal/accuracy_reward/group_std_mean": 0.11019863039255143,
"signal/accuracy_reward/group_zero_std_frac": 0.653125,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7614342093467712,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0384857177734375,
"signal/advantage_abs_mean": 0.7454835653305054,
"signal/advantage_pre_scale_abs_mean": 0.054844215512275696,
"signal/advantage_pre_scale_std": 0.09041195660829544,
"signal/advantage_std": 0.9824780821800232,
"signal/brier_reward/centered_abs_mean": 0.09713428020477295,
"signal/brier_reward/group_std_mean": 0.12663674652576445,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1961934447288513,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009713428001850844,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.01313753817230463,
"signal/confidence_uniqueness_reward/group_std_mean": 0.0176511786878109,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026738233864307404,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013137538451701402,
"signal/format_reward/centered_abs_mean": 0.00111083984375,
"signal/format_reward/group_std_mean": 0.0026419460773468018,
"signal/format_reward/group_zero_std_frac": 0.9875,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.010505561530590058,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.000555419921875,
"signal/frontier_coverage_0/centered_abs_mean": 0.1482255771756172,
"signal/frontier_coverage_0/group_std_mean": 0.1937095880508423,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0427293211221695,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021196257323026655,
"signal/frontier_coverage_1/centered_abs_mean": 0.1482255771756172,
"signal/frontier_coverage_1/group_std_mean": 0.1937095880508423,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0427293211221695,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021196257323026655,
"signal/frontier_coverage_10/centered_abs_mean": 0.14717507511377334,
"signal/frontier_coverage_10/group_std_mean": 0.19235173761844634,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04242658242583275,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021046036155894397,
"signal/frontier_coverage_15/centered_abs_mean": 0.09856034517288208,
"signal/frontier_coverage_15/group_std_mean": 0.12866656184196473,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028408873453736307,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014094128971919417,
"signal/frontier_coverage_20/centered_abs_mean": 0.05635328218340874,
"signal/frontier_coverage_20/group_std_mean": 0.07260994017124175,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016331818141043185,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008058519102633,
"signal/frontier_coverage_25/centered_abs_mean": 0.05241282656788826,
"signal/frontier_coverage_25/group_std_mean": 0.06700127571821213,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015302561409771442,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007495034369640052,
"signal/frontier_coverage_5/centered_abs_mean": 0.148216313123703,
"signal/frontier_coverage_5/group_std_mean": 0.1936979979276657,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04272666648030281,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021194932982325555,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26816104650497435,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3424443662166595,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5510828495025635,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026816104725003242,
"step": 305
},
{
"calibration/aurc": 0.33477025169694324,
"calibration/batch_distribution_entropy": 0.9750517619387263,
"calibration/buffer_distribution_entropy": 0.9969642801765097,
"calibration/confidence_entropy": 0.49620530918619676,
"calibration/coverage@0%": 0.02970992284832691,
"calibration/coverage@1%": 0.02970992284832691,
"calibration/coverage@10%": 0.11534012098922711,
"calibration/coverage@15%": 0.16945480221028464,
"calibration/coverage@20%": 0.2176473629047102,
"calibration/coverage@25%": 0.3736974780121257,
"calibration/coverage@30%": 0.4831432959814198,
"calibration/coverage@5%": 0.04533492284832691,
"calibration/ece": 0.13145712083874353,
"calibration/mean_confidence": 0.4428615850637298,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00107421875,
"completions/max_length": 747.2,
"completions/max_terminated_length": 747.2,
"completions/mean_length": 256.34677734375,
"completions/mean_terminated_length": 256.6212921142578,
"completions/min_length": 47.0,
"completions/min_terminated_length": 127.6,
"epoch": 0.992,
"grad_norm": 0.0102123087272048,
"learning_rate": 1e-06,
"loss": 0.0046,
"num_tokens": 1057291950.0,
"reward": 0.9348322153091431,
"reward_std": 0.07765627354383468,
"rewards/accuracy_reward": 0.54091796875,
"rewards/brier_reward": 0.8109241127967834,
"rewards/confidence_uniqueness_reward": 0.9507102489471435,
"rewards/format_reward": 0.998828125,
"rewards/frontier_coverage_0": 0.133108651638031,
"rewards/frontier_coverage_1": 0.133108651638031,
"rewards/frontier_coverage_10": 0.13217684626579285,
"rewards/frontier_coverage_15": 0.09742676615715026,
"rewards/frontier_coverage_20": 0.0603479154407978,
"rewards/frontier_coverage_25": 0.061624595522880556,
"rewards/frontier_coverage_5": 0.1331046998500824,
"rewards/frontier_entropy_batch_reward": -0.2194212406873703,
"signal/accuracy_reward/centered_abs_mean": 0.084979248046875,
"signal/accuracy_reward/group_std_mean": 0.11238697618246078,
"signal/accuracy_reward/group_zero_std_frac": 0.671875,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8460070013999939,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0424896240234375,
"signal/advantage_abs_mean": 0.763712465763092,
"signal/advantage_pre_scale_abs_mean": 0.05977008268237114,
"signal/advantage_pre_scale_std": 0.09724251925945282,
"signal/advantage_std": 0.9824697017669678,
"signal/brier_reward/centered_abs_mean": 0.09974109381437302,
"signal/brier_reward/group_std_mean": 0.12734393328428267,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20343088805675508,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.009974109753966332,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.014469091221690178,
"signal/confidence_uniqueness_reward/group_std_mean": 0.019127808138728143,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029598025232553483,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014469092013314367,
"signal/format_reward/centered_abs_mean": 0.002001953125,
"signal/format_reward/group_std_mean": 0.0037383693270385265,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.02066621519625187,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0010009765625,
"signal/frontier_coverage_0/centered_abs_mean": 0.16110625863075256,
"signal/frontier_coverage_0/group_std_mean": 0.2007138967514038,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04667669981718063,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023038194980472327,
"signal/frontier_coverage_1/centered_abs_mean": 0.16110625863075256,
"signal/frontier_coverage_1/group_std_mean": 0.2007138967514038,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04667669981718063,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023038194980472327,
"signal/frontier_coverage_10/centered_abs_mean": 0.15938453674316405,
"signal/frontier_coverage_10/group_std_mean": 0.1985825330018997,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04617907330393791,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002279198868200183,
"signal/frontier_coverage_15/centered_abs_mean": 0.10327828973531723,
"signal/frontier_coverage_15/group_std_mean": 0.12951961457729338,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02991574816405773,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014768795343115925,
"signal/frontier_coverage_20/centered_abs_mean": 0.05653135553002357,
"signal/frontier_coverage_20/group_std_mean": 0.0712839536368847,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016467047110199928,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008083983790129423,
"signal/frontier_coverage_25/centered_abs_mean": 0.05165333226323128,
"signal/frontier_coverage_25/group_std_mean": 0.06579789370298386,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015181095898151397,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007386426557786762,
"signal/frontier_coverage_5/centered_abs_mean": 0.16109590530395507,
"signal/frontier_coverage_5/group_std_mean": 0.20070102512836457,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046673715114593506,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023036715108901264,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2753097414970398,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.3474380552768707,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5634862422943115,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02753097340464592,
"step": 310
},
{
"calibration/aurc": 0.2724536165131359,
"calibration/batch_distribution_entropy": 0.9669139741272306,
"calibration/buffer_distribution_entropy": 0.9970807525569785,
"calibration/confidence_entropy": 0.45874800822237394,
"calibration/coverage@0%": 0.0029296875,
"calibration/coverage@1%": 0.0029296875,
"calibration/coverage@10%": 0.0029296875,
"calibration/coverage@15%": 0.0087890625,
"calibration/coverage@20%": 0.23828125,
"calibration/coverage@25%": 0.5478515625,
"calibration/coverage@30%": 0.736328125,
"calibration/coverage@5%": 0.0029296875,
"calibration/ece": 0.12426718671766204,
"calibration/mean_confidence": 0.5752325127324063,
"clip_ratio/high_max": 0.0,
"clip_ratio/high_mean": 0.0,
"clip_ratio/low_mean": 0.0,
"clip_ratio/low_min": 0.0,
"clip_ratio/region_mean": 0.0,
"completions/clipped_ratio": 0.00048828125,
"completions/max_length": 593.5,
"completions/max_terminated_length": 593.5,
"completions/mean_length": 256.4180374145508,
"completions/mean_terminated_length": 256.5437545776367,
"completions/min_length": 58.0,
"completions/min_terminated_length": 128.0,
"epoch": 0.9984,
"num_tokens": 1064347934.0,
"reward": 0.9560622274875641,
"reward_std": 0.0776829868555069,
"rewards/accuracy_reward": 0.59228515625,
"rewards/brier_reward": 0.7798943519592285,
"rewards/confidence_uniqueness_reward": 0.9535358250141144,
"rewards/format_reward": 0.99951171875,
"rewards/frontier_coverage_0": 0.0610650721937418,
"rewards/frontier_coverage_1": 0.0610650721937418,
"rewards/frontier_coverage_10": 0.061735767871141434,
"rewards/frontier_coverage_15": 0.04953071102499962,
"rewards/frontier_coverage_20": 0.04053525626659393,
"rewards/frontier_coverage_25": 0.05742606520652771,
"rewards/frontier_coverage_5": 0.06107356771826744,
"rewards/frontier_entropy_batch_reward": -0.18790987133979797,
"signal/accuracy_reward/centered_abs_mean": 0.079864501953125,
"signal/accuracy_reward/group_std_mean": 0.11184961348772049,
"signal/accuracy_reward/group_zero_std_frac": 0.65625,
"signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8313649296760559,
"signal/accuracy_reward/weight": 0.5,
"signal/accuracy_reward/weighted_centered_abs_mean": 0.0399322509765625,
"signal/advantage_abs_mean": 0.7595762014389038,
"signal/advantage_pre_scale_abs_mean": 0.05862266570329666,
"signal/advantage_pre_scale_std": 0.09895920753479004,
"signal/advantage_std": 0.9824418723583221,
"signal/brier_reward/centered_abs_mean": 0.10770522058010101,
"signal/brier_reward/group_std_mean": 0.13640563189983368,
"signal/brier_reward/group_zero_std_frac": 0.0,
"signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2242312952876091,
"signal/brier_reward/weight": 0.10000000149011612,
"signal/brier_reward/weighted_centered_abs_mean": 0.010770522058010101,
"signal/confidence_uniqueness_reward/centered_abs_mean": 0.011800288688391447,
"signal/confidence_uniqueness_reward/group_std_mean": 0.016339605674147606,
"signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0,
"signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024581880308687687,
"signal/confidence_uniqueness_reward/weight": 0.10000000149011612,
"signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001180028892122209,
"signal/format_reward/centered_abs_mean": 0.000946044921875,
"signal/format_reward/group_std_mean": 0.0027621358167380095,
"signal/format_reward/group_zero_std_frac": 0.984375,
"signal/format_reward/scaled_weighted_centered_abs_mean": 0.009899882599711418,
"signal/format_reward/weight": 0.5,
"signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375,
"signal/frontier_coverage_0/centered_abs_mean": 0.14607372134923935,
"signal/frontier_coverage_0/group_std_mean": 0.18750649690628052,
"signal/frontier_coverage_0/group_zero_std_frac": 0.0,
"signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.043485309928655624,
"signal/frontier_coverage_0/weight": 0.014299999922513962,
"signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020888540893793106,
"signal/frontier_coverage_1/centered_abs_mean": 0.14607372134923935,
"signal/frontier_coverage_1/group_std_mean": 0.18750649690628052,
"signal/frontier_coverage_1/group_zero_std_frac": 0.0,
"signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.043485309928655624,
"signal/frontier_coverage_1/weight": 0.014299999922513962,
"signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020888540893793106,
"signal/frontier_coverage_10/centered_abs_mean": 0.14458149671554565,
"signal/frontier_coverage_10/group_std_mean": 0.18554429709911346,
"signal/frontier_coverage_10/group_zero_std_frac": 0.0,
"signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04304126277565956,
"signal/frontier_coverage_10/weight": 0.014299999922513962,
"signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002067515510134399,
"signal/frontier_coverage_15/centered_abs_mean": 0.09289034456014633,
"signal/frontier_coverage_15/group_std_mean": 0.11935219541192055,
"signal/frontier_coverage_15/group_zero_std_frac": 0.0,
"signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027649453841149807,
"signal/frontier_coverage_15/weight": 0.014299999922513962,
"signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001328331942204386,
"signal/frontier_coverage_20/centered_abs_mean": 0.05580424703657627,
"signal/frontier_coverage_20/group_std_mean": 0.07091843336820602,
"signal/frontier_coverage_20/group_zero_std_frac": 0.0,
"signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016613470390439034,
"signal/frontier_coverage_20/weight": 0.014299999922513962,
"signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007980007212609053,
"signal/frontier_coverage_25/centered_abs_mean": 0.05803663656115532,
"signal/frontier_coverage_25/group_std_mean": 0.07403568923473358,
"signal/frontier_coverage_25/group_zero_std_frac": 0.0,
"signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017281348817050457,
"signal/frontier_coverage_25/weight": 0.014299999922513962,
"signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008299238979816437,
"signal/frontier_coverage_5/centered_abs_mean": 0.14606471359729767,
"signal/frontier_coverage_5/group_std_mean": 0.18749448657035828,
"signal/frontier_coverage_5/group_zero_std_frac": 0.0,
"signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04348263330757618,
"signal/frontier_coverage_5/weight": 0.014299999922513962,
"signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002088725450448692,
"signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25206458568573,
"signal/frontier_entropy_batch_reward/group_std_mean": 0.326417937874794,
"signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0,
"signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5250041484832764,
"signal/frontier_entropy_batch_reward/weight": 0.10000000149011612,
"signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025206458754837513,
"step": 312,
"total_flos": 0.0,
"train_loss": -0.0004958666193907937,
"train_runtime": 60044.1605,
"train_samples_per_second": 0.333,
"train_steps_per_second": 0.005
}
],
"logging_steps": 5,
"max_steps": 312,
"num_input_tokens_seen": 1064347934,
"num_train_epochs": 1,
"save_steps": 60,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}