{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6339422860552735, "calibration/batch_distribution_entropy": 0.6446441729443816, "calibration/confidence_entropy": 0.3458862984235007, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4998652652500846, "calibration/mean_confidence": 0.790780918036414, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03681640625, "completions/max_length": 1490.6, "completions/max_terminated_length": 1490.6, "completions/mean_length": 215.999609375, "completions/mean_terminated_length": 224.2462951660156, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.12977811694145203, "learning_rate": 3.1249999999999997e-07, "loss": 0.017, "num_tokens": 17055868.0, "reward": 0.500128197669983, "reward_std": 0.3792427361011505, "rewards/accuracy_reward": 0.2193359375, "rewards/brier_reward": 0.3712968111038208, "rewards/confidence_uniqueness_reward": 0.4833169639110565, "rewards/format_reward": 0.6802734375, "rewards/frontier_coverage_0": 0.29783164262771605, "rewards/frontier_coverage_1": 0.29783164262771605, "rewards/frontier_coverage_10": 0.29783164262771605, "rewards/frontier_coverage_15": 0.29783164262771605, "rewards/frontier_coverage_20": 0.29783164262771605, "rewards/frontier_coverage_25": 0.29783164262771605, "rewards/frontier_coverage_5": 0.29783164262771605, "rewards/frontier_entropy_batch_reward": -0.649508249759674, "signal/accuracy_reward/centered_abs_mean": 0.24100341796875, "signal/accuracy_reward/group_std_mean": 0.2807555437088013, "signal/accuracy_reward/group_zero_std_frac": 0.33125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.34670318365097047, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.120501708984375, "signal/advantage_abs_mean": 0.8450873494148254, "signal/advantage_pre_scale_abs_mean": 0.32363836765289306, "signal/advantage_pre_scale_std": 0.3882134258747101, "signal/advantage_std": 0.9842045307159424, "signal/brier_reward/centered_abs_mean": 0.31905131340026854, "signal/brier_reward/group_std_mean": 0.36378265619277955, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09252839237451553, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03190513178706169, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2979742467403412, "signal/confidence_uniqueness_reward/group_std_mean": 0.3481938362121582, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08685077279806137, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029797424748539924, "signal/format_reward/centered_abs_mean": 0.40498046875, "signal/format_reward/group_std_mean": 0.4544346511363983, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5899764060974121, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.202490234375, "signal/frontier_coverage_0/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_0/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_0/group_zero_std_frac": 0.003125, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_1/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_1/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_10/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_10/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_15/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_15/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_20/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_20/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_25/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_25/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_coverage_5/centered_abs_mean": 0.29145163893699644, "signal/frontier_coverage_5/group_std_mean": 0.3415479838848114, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012060248106718064, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004167758347466588, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42818194031715395, "signal/frontier_entropy_batch_reward/group_std_mean": 0.47405582666397095, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12476505488157272, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0428181953728199, "step": 5 }, { "calibration/aurc": 0.6660608772094376, "calibration/batch_distribution_entropy": 0.6544913229610491, "calibration/confidence_entropy": 0.33948615354690503, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5208878541612636, "calibration/mean_confidence": 0.7934546409463656, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03876953125, "completions/max_length": 1502.0, "completions/max_terminated_length": 1502.0, "completions/mean_length": 202.78828125, "completions/mean_terminated_length": 210.9924285888672, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.032, "grad_norm": 0.13490329682826996, "learning_rate": 6.249999999999999e-07, "loss": 0.0069, "num_tokens": 34232772.0, "reward": 0.5240197837352752, "reward_std": 0.35646448731422425, "rewards/accuracy_reward": 0.21396484375, "rewards/brier_reward": 0.38541473150253297, "rewards/confidence_uniqueness_reward": 0.5249821066856384, "rewards/format_reward": 0.72978515625, "rewards/frontier_coverage_0": 0.3038561224937439, "rewards/frontier_coverage_1": 0.3038561224937439, "rewards/frontier_coverage_10": 0.3038561224937439, "rewards/frontier_coverage_15": 0.3038561224937439, "rewards/frontier_coverage_20": 0.3038561224937439, "rewards/frontier_coverage_25": 0.3038561224937439, "rewards/frontier_coverage_5": 0.3038561224937439, "rewards/frontier_entropy_batch_reward": -0.6931091666221618, "signal/accuracy_reward/centered_abs_mean": 0.223748779296875, "signal/accuracy_reward/group_std_mean": 0.26854496598243716, "signal/accuracy_reward/group_zero_std_frac": 0.33125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3456988275051117, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1118743896484375, "signal/advantage_abs_mean": 0.8104079842567444, "signal/advantage_pre_scale_abs_mean": 0.293930447101593, "signal/advantage_pre_scale_std": 0.3660040318965912, "signal/advantage_std": 0.9841862320899963, "signal/brier_reward/centered_abs_mean": 0.30312992334365846, "signal/brier_reward/group_std_mean": 0.3520526349544525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09381168931722642, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030312991887331008, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.27632507085800173, "signal/confidence_uniqueness_reward/group_std_mean": 0.33531762957572936, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08543038666248322, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.027632508799433707, "signal/format_reward/centered_abs_mean": 0.362567138671875, "signal/format_reward/group_std_mean": 0.4284077942371368, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5599912583827973, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1812835693359375, "signal/frontier_coverage_0/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_0/group_std_mean": 0.331596827507019, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_1/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_1/group_std_mean": 0.331596827507019, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_10/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_10/group_std_mean": 0.331596827507019, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_15/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_15/group_std_mean": 0.331596827507019, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_20/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_20/group_std_mean": 0.331596827507019, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_25/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_25/group_std_mean": 0.331596827507019, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_coverage_5/centered_abs_mean": 0.27730215191841123, "signal/frontier_coverage_5/group_std_mean": 0.331596827507019, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012270637229084969, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003965420834720135, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39430789947509765, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4540126621723175, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12191563993692398, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0394307903945446, "step": 10 }, { "calibration/aurc": 0.5802905987854532, "calibration/batch_distribution_entropy": 0.6451810079102698, "calibration/buffer_distribution_entropy": 0.6653544222673999, "calibration/confidence_entropy": 0.34505604458967376, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4489973757469999, "calibration/mean_confidence": 0.8003520858627509, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0158203125, "completions/max_length": 1489.8, "completions/max_terminated_length": 1489.8, "completions/mean_length": 167.04140625, "completions/mean_terminated_length": 169.87072143554687, "completions/min_length": 0.0, "completions/min_terminated_length": 14.8, "epoch": 0.048, "grad_norm": 0.06792226433753967, "learning_rate": 9.374999999999999e-07, "loss": 0.0051, "num_tokens": 50992012.0, "reward": 0.6604457139968872, "reward_std": 0.26606449782848357, "rewards/accuracy_reward": 0.290234375, "rewards/brier_reward": 0.5046543598175048, "rewards/confidence_uniqueness_reward": 0.6614177465438843, "rewards/format_reward": 0.9013671875, "rewards/frontier_coverage_0": 0.3303596466779709, "rewards/frontier_coverage_1": 0.3303596466779709, "rewards/frontier_coverage_10": 0.3303596466779709, "rewards/frontier_coverage_15": 0.3303596466779709, "rewards/frontier_coverage_20": 0.3303596466779709, "rewards/frontier_coverage_25": 0.3303596466779709, "rewards/frontier_coverage_5": 0.3303596466779709, "rewards/frontier_entropy_batch_reward": -0.8503130555152894, "signal/accuracy_reward/centered_abs_mean": 0.1984130859375, "signal/accuracy_reward/group_std_mean": 0.24557192921638488, "signal/accuracy_reward/group_zero_std_frac": 0.365625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4712305724620819, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09920654296875, "signal/advantage_abs_mean": 0.7059193015098572, "signal/advantage_pre_scale_abs_mean": 0.19891976118087767, "signal/advantage_pre_scale_std": 0.2803692609071732, "signal/advantage_std": 0.9840129852294922, "signal/brier_reward/centered_abs_mean": 0.26394283175468447, "signal/brier_reward/group_std_mean": 0.3196195185184479, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12516716569662095, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02639428377151489, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1863771289587021, "signal/confidence_uniqueness_reward/group_std_mean": 0.24599670469760895, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08663659989833832, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01863771304488182, "signal/format_reward/centered_abs_mean": 0.16243896484375, "signal/format_reward/group_std_mean": 0.25803537368774415, "signal/format_reward/group_zero_std_frac": 0.1375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.34426852166652677, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.081219482421875, "signal/frontier_coverage_0/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_0/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_1/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_1/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_10/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_10/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_15/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_15/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_20/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_20/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_25/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_25/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_coverage_5/centered_abs_mean": 0.23436213433742523, "signal/frontier_coverage_5/group_std_mean": 0.2903676062822342, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01507992073893547, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033513785572722556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2380182147026062, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3486140549182892, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0375, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10810260623693466, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02380182184278965, "step": 15 }, { "calibration/aurc": 0.5322016670746652, "calibration/batch_distribution_entropy": 0.7705823528313185, "calibration/buffer_distribution_entropy": 0.6686908598810465, "calibration/confidence_entropy": 0.4170525519014621, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.32542858900510585, "calibration/mean_confidence": 0.7265149808992029, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0044921875, "completions/max_length": 1205.0, "completions/max_terminated_length": 1205.0, "completions/mean_length": 127.00986328125, "completions/mean_terminated_length": 127.59269409179687, "completions/min_length": 0.0, "completions/min_terminated_length": 25.0, "epoch": 0.064, "grad_norm": 0.13832873106002808, "learning_rate": 1e-06, "loss": -0.0098, "num_tokens": 67210993.0, "reward": 0.7248212337493897, "reward_std": 0.17984383106231688, "rewards/accuracy_reward": 0.3447265625, "rewards/brier_reward": 0.610313105583191, "rewards/confidence_uniqueness_reward": 0.7996426224708557, "rewards/format_reward": 0.9826171875, "rewards/frontier_coverage_0": 0.07571766301989555, "rewards/frontier_coverage_1": 0.07571766301989555, "rewards/frontier_coverage_10": 0.07571766301989555, "rewards/frontier_coverage_15": 0.07571766301989555, "rewards/frontier_coverage_20": 0.07571766301989555, "rewards/frontier_coverage_25": 0.07571766301989555, "rewards/frontier_coverage_5": 0.07571766301989555, "rewards/frontier_entropy_batch_reward": -0.8742559432983399, "signal/accuracy_reward/centered_abs_mean": 0.193359375, "signal/accuracy_reward/group_std_mean": 0.24748140275478364, "signal/accuracy_reward/group_zero_std_frac": 0.328125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7142280459403991, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0966796875, "signal/advantage_abs_mean": 0.7126465678215027, "signal/advantage_pre_scale_abs_mean": 0.1337364584207535, "signal/advantage_pre_scale_std": 0.1942312479019165, "signal/advantage_std": 0.9837595462799072, "signal/brier_reward/centered_abs_mean": 0.23327789902687074, "signal/brier_reward/group_std_mean": 0.28798535466194153, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17303505837917327, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023327790945768357, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09244537949562073, "signal/confidence_uniqueness_reward/group_std_mean": 0.13309455364942552, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06808455139398575, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009244537819176913, "signal/format_reward/centered_abs_mean": 0.0329345703125, "signal/format_reward/group_std_mean": 0.08150013014674187, "signal/format_reward/group_zero_std_frac": 0.59375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11886298581957817, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01646728515625, "signal/frontier_coverage_0/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_0/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_1/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_1/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_10/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_10/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_15/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_15/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_20/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_20/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_25/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_25/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_coverage_5/centered_abs_mean": 0.13106433302164078, "signal/frontier_coverage_5/group_std_mean": 0.19403213858604432, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.014053609594702721, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001874219998717308, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20934711396694183, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3334401249885559, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.05, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.1583297297358513, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020934711396694183, "step": 20 }, { "calibration/aurc": 0.6791233532497524, "calibration/batch_distribution_entropy": 0.9538421546715095, "calibration/buffer_distribution_entropy": 0.7588070364798357, "calibration/confidence_entropy": 0.5099153307060644, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.001171875, "calibration/coverage@20%": 0.001171875, "calibration/coverage@25%": 0.0015625, "calibration/coverage@30%": 0.0015625, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.27117672410782256, "calibration/mean_confidence": 0.47369882184296996, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00244140625, "completions/max_length": 995.0, "completions/max_terminated_length": 995.0, "completions/mean_length": 104.07744140625, "completions/mean_terminated_length": 104.32813873291016, "completions/min_length": 0.0, "completions/min_terminated_length": 33.8, "epoch": 0.08, "grad_norm": 0.060792941600084305, "learning_rate": 1e-06, "loss": -0.0061, "num_tokens": 83209898.0, "reward": 0.7957952499389649, "reward_std": 0.1482664555311203, "rewards/accuracy_reward": 0.34365234375, "rewards/brier_reward": 0.7017360806465149, "rewards/confidence_uniqueness_reward": 0.9248051285743714, "rewards/format_reward": 0.9947265625, "rewards/frontier_coverage_0": 0.13328106552362443, "rewards/frontier_coverage_1": 0.13328106552362443, "rewards/frontier_coverage_10": 0.13328106552362443, "rewards/frontier_coverage_15": 0.13328106552362443, "rewards/frontier_coverage_20": 0.13328106552362443, "rewards/frontier_coverage_25": 0.13328106552362443, "rewards/frontier_coverage_5": 0.13328106552362443, "rewards/frontier_entropy_batch_reward": -0.4938975155353546, "signal/accuracy_reward/centered_abs_mean": 0.192559814453125, "signal/accuracy_reward/group_std_mean": 0.2381644457578659, "signal/accuracy_reward/group_zero_std_frac": 0.384375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.871105182170868, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0962799072265625, "signal/advantage_abs_mean": 0.769687807559967, "signal/advantage_pre_scale_abs_mean": 0.11523678749799729, "signal/advantage_pre_scale_std": 0.1617472231388092, "signal/advantage_std": 0.983604621887207, "signal/brier_reward/centered_abs_mean": 0.21707654893398284, "signal/brier_reward/group_std_mean": 0.2683330178260803, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19545693695545197, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02170765623450279, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04447389058768749, "signal/confidence_uniqueness_reward/group_std_mean": 0.06878753378987312, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03905631639063358, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004447389068081975, "signal/format_reward/centered_abs_mean": 0.01019287109375, "signal/format_reward/group_std_mean": 0.02915844917297363, "signal/format_reward/group_zero_std_frac": 0.8375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04567938521504402, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005096435546875, "signal/frontier_coverage_0/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_0/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_1/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_1/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_10/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_10/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_15/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_15/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_20/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_20/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_25/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_25/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_coverage_5/centered_abs_mean": 0.2647977530956268, "signal/frontier_coverage_5/group_std_mean": 0.3346329748630524, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03465293869376183, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037866079248487948, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.44679933190345766, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5179579973220825, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40088812708854676, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044679935276508334, "step": 25 }, { "calibration/aurc": 0.6576899380734184, "calibration/batch_distribution_entropy": 0.8272113669921964, "calibration/buffer_distribution_entropy": 0.8702624531424125, "calibration/confidence_entropy": 0.44559956564740794, "calibration/coverage@0%": 0.000390625, "calibration/coverage@1%": 0.000390625, "calibration/coverage@10%": 0.000390625, "calibration/coverage@15%": 0.000390625, "calibration/coverage@20%": 0.000390625, "calibration/coverage@25%": 0.000390625, "calibration/coverage@30%": 0.000390625, "calibration/coverage@5%": 0.000390625, "calibration/ece": 0.17525078401977626, "calibration/mean_confidence": 0.2572908789952067, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002734375, "completions/max_length": 1001.0, "completions/max_terminated_length": 1001.0, "completions/mean_length": 96.93759765625, "completions/mean_terminated_length": 97.20596618652344, "completions/min_length": 0.0, "completions/min_terminated_length": 37.6, "epoch": 0.096, "grad_norm": 0.03620428219437599, "learning_rate": 1e-06, "loss": -0.0134, "num_tokens": 99247147.0, "reward": 0.7996835231781005, "reward_std": 0.1173510953783989, "rewards/accuracy_reward": 0.34423828125, "rewards/brier_reward": 0.7268486857414246, "rewards/confidence_uniqueness_reward": 0.9211776852607727, "rewards/format_reward": 0.9951171875, "rewards/frontier_coverage_0": 0.1958144187927246, "rewards/frontier_coverage_1": 0.1958144187927246, "rewards/frontier_coverage_10": 0.1958144187927246, "rewards/frontier_coverage_15": 0.1958144187927246, "rewards/frontier_coverage_20": 0.1958144187927246, "rewards/frontier_coverage_25": 0.1958144187927246, "rewards/frontier_coverage_5": 0.1958144187927246, "rewards/frontier_entropy_batch_reward": -0.5439786970615387, "signal/accuracy_reward/centered_abs_mean": 0.184832763671875, "signal/accuracy_reward/group_std_mean": 0.2294948309659958, "signal/accuracy_reward/group_zero_std_frac": 0.396875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.3462110042572022, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0924163818359375, "signal/advantage_abs_mean": 0.7473811984062195, "signal/advantage_pre_scale_abs_mean": 0.08806595504283905, "signal/advantage_pre_scale_std": 0.13366734385490417, "signal/advantage_std": 0.9830494165420532, "signal/brier_reward/centered_abs_mean": 0.19736847281455994, "signal/brier_reward/group_std_mean": 0.24908939003944397, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.288382089138031, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019736847281455992, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03614585101604462, "signal/confidence_uniqueness_reward/group_std_mean": 0.05685581639409065, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.053042204678058626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003614585101604462, "signal/format_reward/centered_abs_mean": 0.0093994140625, "signal/format_reward/group_std_mean": 0.02593981511890888, "signal/format_reward/group_zero_std_frac": 0.859375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0692012570798397, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00469970703125, "signal/frontier_coverage_0/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_0/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_1/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_1/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_10/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_10/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_15/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_15/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_20/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_20/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_25/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_25/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_coverage_5/centered_abs_mean": 0.3461109459400177, "signal/frontier_coverage_5/group_std_mean": 0.42214337587356565, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.07239173352718353, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0049493865109980105, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4473235845565796, "signal/frontier_entropy_batch_reward/group_std_mean": 0.5099922716617584, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6549581050872803, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.044732359051704404, "step": 30 }, { "calibration/aurc": 0.5592464061427573, "calibration/batch_distribution_entropy": 0.9734186029823684, "calibration/buffer_distribution_entropy": 0.9249716187963466, "calibration/confidence_entropy": 0.5295920857549896, "calibration/coverage@0%": 0.00078125, "calibration/coverage@1%": 0.00078125, "calibration/coverage@10%": 0.00078125, "calibration/coverage@15%": 0.00078125, "calibration/coverage@20%": 0.001953125, "calibration/coverage@25%": 0.0035217524509803923, "calibration/coverage@30%": 0.005474877450980392, "calibration/coverage@5%": 0.00078125, "calibration/ece": 0.229060973623976, "calibration/mean_confidence": 0.43376112419377993, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 663.4, "completions/max_terminated_length": 663.4, "completions/mean_length": 94.01142578125, "completions/mean_terminated_length": 94.13132019042969, "completions/min_length": 7.6, "completions/min_terminated_length": 40.0, "epoch": 0.112, "grad_norm": 0.016059977933764458, "learning_rate": 1e-06, "loss": -0.0048, "num_tokens": 115319296.0, "reward": 0.855149245262146, "reward_std": 0.13141493052244185, "rewards/accuracy_reward": 0.41474609375, "rewards/brier_reward": 0.6991081237792969, "rewards/confidence_uniqueness_reward": 0.9526235222816467, "rewards/format_reward": 0.99775390625, "rewards/frontier_coverage_0": 0.07727691400796174, "rewards/frontier_coverage_1": 0.07727691400796174, "rewards/frontier_coverage_10": 0.07727691400796174, "rewards/frontier_coverage_15": 0.07727691400796174, "rewards/frontier_coverage_20": 0.07727691400796174, "rewards/frontier_coverage_25": 0.07727691400796174, "rewards/frontier_coverage_5": 0.07727691400796174, "rewards/frontier_entropy_batch_reward": -0.24009357690811156, "signal/accuracy_reward/centered_abs_mean": 0.181622314453125, "signal/accuracy_reward/group_std_mean": 0.23383015990257264, "signal/accuracy_reward/group_zero_std_frac": 0.353125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9329653382301331, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0908111572265625, "signal/advantage_abs_mean": 0.7803734302520752, "signal/advantage_pre_scale_abs_mean": 0.10330383628606796, "signal/advantage_pre_scale_std": 0.14460791051387786, "signal/advantage_std": 0.9834746479988098, "signal/brier_reward/centered_abs_mean": 0.21031469106674194, "signal/brier_reward/group_std_mean": 0.2588426500558853, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.217045795917511, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021031468734145166, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01818895637989044, "signal/confidence_uniqueness_reward/group_std_mean": 0.029530685395002365, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01874598637223244, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018188956892117858, "signal/format_reward/centered_abs_mean": 0.004351806640625, "signal/format_reward/group_std_mean": 0.012705824710428715, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.022282978147268297, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125, "signal/frontier_coverage_0/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_0/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_1/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_1/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_10/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_10/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_15/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_15/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_20/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_20/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_25/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_25/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_coverage_5/centered_abs_mean": 0.2851457536220551, "signal/frontier_coverage_5/group_std_mean": 0.3561969459056854, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042065325379371646, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00407758429646492, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3295664429664612, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4042933166027069, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34022077918052673, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032956644892692566, "step": 35 }, { "calibration/aurc": 0.5485888003105004, "calibration/batch_distribution_entropy": 0.9583806014639318, "calibration/buffer_distribution_entropy": 0.9500481098032335, "calibration/confidence_entropy": 0.4826562493346168, "calibration/coverage@0%": 0.001960016883465715, "calibration/coverage@1%": 0.001960016883465715, "calibration/coverage@10%": 0.004312958059936303, "calibration/coverage@15%": 0.004312958059936303, "calibration/coverage@20%": 0.004312958059936303, "calibration/coverage@25%": 0.00822532951536779, "calibration/coverage@30%": 0.01018611382909328, "calibration/coverage@5%": 0.001960016883465715, "calibration/ece": 0.2487835388596178, "calibration/mean_confidence": 0.37875214633636106, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 681.4, "completions/max_terminated_length": 681.4, "completions/mean_length": 98.33955078125, "completions/mean_terminated_length": 98.4450439453125, "completions/min_length": 8.0, "completions/min_terminated_length": 41.2, "epoch": 0.128, "grad_norm": 0.013107044622302055, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 131242965.0, "reward": 0.857842743396759, "reward_std": 0.11681393682956695, "rewards/accuracy_reward": 0.4255859375, "rewards/brier_reward": 0.6927406430244446, "rewards/confidence_uniqueness_reward": 0.9548697948455811, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.08065508380532264, "rewards/frontier_coverage_1": 0.08065508380532264, "rewards/frontier_coverage_10": 0.08065508380532264, "rewards/frontier_coverage_15": 0.08065508380532264, "rewards/frontier_coverage_20": 0.08065508380532264, "rewards/frontier_coverage_25": 0.08065508380532264, "rewards/frontier_coverage_5": 0.08065508380532264, "rewards/frontier_entropy_batch_reward": -0.2705242335796356, "signal/accuracy_reward/centered_abs_mean": 0.1486083984375, "signal/accuracy_reward/group_std_mean": 0.19626019299030303, "signal/accuracy_reward/group_zero_std_frac": 0.44375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8739246845245361, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07430419921875, "signal/advantage_abs_mean": 0.7811145305633544, "signal/advantage_pre_scale_abs_mean": 0.09206330478191375, "signal/advantage_pre_scale_std": 0.13003001511096954, "signal/advantage_std": 0.9833110690116882, "signal/brier_reward/centered_abs_mean": 0.22610692977905272, "signal/brier_reward/group_std_mean": 0.2758404791355133, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.27061591744422914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02261069305241108, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021954387053847314, "signal/confidence_uniqueness_reward/group_std_mean": 0.030994601547718048, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026323718205094337, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002195438789203763, "signal/format_reward/centered_abs_mean": 0.002801513671875, "signal/format_reward/group_std_mean": 0.0072774821892380714, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01556429360061884, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014007568359375, "signal/frontier_coverage_0/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_0/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_1/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_1/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_10/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_10/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_15/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_15/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_20/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_20/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_25/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_25/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_coverage_5/centered_abs_mean": 0.30873937606811525, "signal/frontier_coverage_5/group_std_mean": 0.37858131527900696, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.05287817344069481, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00441497303545475, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3557403266429901, "signal/frontier_entropy_batch_reward/group_std_mean": 0.426521098613739, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42853416204452516, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035574034601449964, "step": 40 }, { "calibration/aurc": 0.40409018226935123, "calibration/batch_distribution_entropy": 0.975256420237604, "calibration/buffer_distribution_entropy": 0.9636264971133013, "calibration/confidence_entropy": 0.5030471631787824, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.001171875, "calibration/coverage@20%": 0.13203583659491194, "calibration/coverage@25%": 0.20586931262230918, "calibration/coverage@30%": 0.20586931262230918, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.2794678515618836, "calibration/mean_confidence": 0.42314918125267065, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 686.0, "completions/max_terminated_length": 686.0, "completions/mean_length": 102.1638671875, "completions/mean_terminated_length": 102.22365875244141, "completions/min_length": 3.0, "completions/min_terminated_length": 38.0, "epoch": 0.144, "grad_norm": 0.023353978991508484, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 147239555.0, "reward": 0.9046274423599243, "reward_std": 0.12026365250349044, "rewards/accuracy_reward": 0.53173828125, "rewards/brier_reward": 0.678732717037201, "rewards/confidence_uniqueness_reward": 0.9554983496665954, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": -0.005397527106106281, "rewards/frontier_coverage_1": -0.005397527106106281, "rewards/frontier_coverage_10": -0.005397527106106281, "rewards/frontier_coverage_15": -0.005397527106106281, "rewards/frontier_coverage_20": -0.005397527106106281, "rewards/frontier_coverage_25": -0.005397527106106281, "rewards/frontier_coverage_5": -0.005397527106106281, "rewards/frontier_entropy_batch_reward": -0.23685037195682526, "signal/accuracy_reward/centered_abs_mean": 0.150152587890625, "signal/accuracy_reward/group_std_mean": 0.19809286296367645, "signal/accuracy_reward/group_zero_std_frac": 0.4375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830261766910553, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0750762939453125, "signal/advantage_abs_mean": 0.7871874570846558, "signal/advantage_pre_scale_abs_mean": 0.09534858167171478, "signal/advantage_pre_scale_std": 0.13183027058839797, "signal/advantage_std": 0.9833981156349182, "signal/brier_reward/centered_abs_mean": 0.23030579090118408, "signal/brier_reward/group_std_mean": 0.2796397864818573, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2559266179800034, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02303057983517647, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020615693554282187, "signal/confidence_uniqueness_reward/group_std_mean": 0.02922433577477932, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02314589861780405, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002061569388024509, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009363159909844399, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_0/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_1/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_1/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_10/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_10/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_15/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_15/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_20/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_20/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_25/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_25/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_coverage_5/centered_abs_mean": 0.3010148942470551, "signal/frontier_coverage_5/group_std_mean": 0.37279834151268004, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04789614900946617, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004304513148963451, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3241081744432449, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3993211805820465, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3629483371973038, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03241081647574902, "step": 45 }, { "calibration/aurc": 0.47796316607726597, "calibration/batch_distribution_entropy": 0.9817986151177764, "calibration/buffer_distribution_entropy": 0.9728652858416347, "calibration/confidence_entropy": 0.530073439985547, "calibration/coverage@0%": 0.001953125, "calibration/coverage@1%": 0.001953125, "calibration/coverage@10%": 0.001953125, "calibration/coverage@15%": 0.001953125, "calibration/coverage@20%": 0.008988197162426614, "calibration/coverage@25%": 0.008988197162426614, "calibration/coverage@30%": 0.012113197162426615, "calibration/coverage@5%": 0.001953125, "calibration/ece": 0.19193092135956252, "calibration/mean_confidence": 0.5419844403512751, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 804.8, "completions/max_terminated_length": 804.8, "completions/mean_length": 111.23623046875, "completions/mean_terminated_length": 111.38824768066407, "completions/min_length": 0.0, "completions/min_terminated_length": 46.4, "epoch": 0.16, "grad_norm": 0.02443806827068329, "learning_rate": 1e-06, "loss": -0.0073, "num_tokens": 163399542.0, "reward": 0.883971381187439, "reward_std": 0.12701284289360046, "rewards/accuracy_reward": 0.4640625, "rewards/brier_reward": 0.705234956741333, "rewards/confidence_uniqueness_reward": 0.955999755859375, "rewards/format_reward": 0.9986328125, "rewards/frontier_coverage_0": 0.048603178933262825, "rewards/frontier_coverage_1": 0.048603178933262825, "rewards/frontier_coverage_10": 0.048603178933262825, "rewards/frontier_coverage_15": 0.048603178933262825, "rewards/frontier_coverage_20": 0.048603178933262825, "rewards/frontier_coverage_25": 0.048603178933262825, "rewards/frontier_coverage_5": 0.048603178933262825, "rewards/frontier_entropy_batch_reward": -0.18364944458007812, "signal/accuracy_reward/centered_abs_mean": 0.14886474609375, "signal/accuracy_reward/group_std_mean": 0.19415634870529175, "signal/accuracy_reward/group_zero_std_frac": 0.4625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.781326687335968, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.074432373046875, "signal/advantage_abs_mean": 0.7767141819000244, "signal/advantage_pre_scale_abs_mean": 0.09897643923759461, "signal/advantage_pre_scale_std": 0.14180286526679992, "signal/advantage_std": 0.983447003364563, "signal/brier_reward/centered_abs_mean": 0.21131813228130342, "signal/brier_reward/group_std_mean": 0.2597563862800598, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2243928611278534, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021131813526153564, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014774037152528762, "signal/confidence_uniqueness_reward/group_std_mean": 0.022573205083608626, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01570458896458149, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001477403729222715, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.007733980193734169, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014173118397593498, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_coverage_0/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_0/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_1/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_1/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_10/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_10/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_15/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_15/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_20/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_20/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_25/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_25/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_coverage_5/centered_abs_mean": 0.22484306693077089, "signal/frontier_coverage_5/group_std_mean": 0.29105273485183714, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03399848416447639, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032152560073882342, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27477757930755614, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35714380741119384, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.29268457293510436, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02747775800526142, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5952072642536581, "eval_calibration/batch_distribution_entropy": 0.9297086608557112, "eval_calibration/buffer_distribution_entropy": 0.9766844148452357, "eval_calibration/confidence_entropy": 0.5220518976892288, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.03125, "eval_calibration/coverage@20%": 0.03125, "eval_calibration/coverage@25%": 0.0546875, "eval_calibration/coverage@30%": 0.09375, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.2956597683100823, "eval_calibration/mean_confidence": 0.5218693570576916, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 282.25, "eval_completions/max_terminated_length": 282.25, "eval_completions/mean_length": 110.61812973022461, "eval_completions/mean_terminated_length": 110.61812973022461, "eval_completions/min_length": 54.5, "eval_completions/min_terminated_length": 54.5, "eval_loss": 0.0, "eval_num_tokens": 163399542.0, "eval_reward": 0.7699112445116043, "eval_reward_std": 0.22320134565234184, "eval_rewards/accuracy_reward": 0.396484375, "eval_rewards/brier_reward": 0.7137555778026581, "eval_rewards/confidence_uniqueness_reward": 0.896240234375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.10658804513514042, "eval_rewards/frontier_coverage_1": 0.10658804513514042, "eval_rewards/frontier_coverage_10": 0.10658804513514042, "eval_rewards/frontier_coverage_15": 0.10658804513514042, "eval_rewards/frontier_coverage_20": 0.10658804513514042, "eval_rewards/frontier_coverage_25": 0.10658804513514042, "eval_rewards/frontier_coverage_5": 0.10658804513514042, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 16.3101, "eval_samples_per_second": 30.656, "eval_signal/accuracy_reward/centered_abs_mean": 0.4571533203125, "eval_signal/accuracy_reward/group_std_mean": 0.4844451770186424, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0287865847349167, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22857666015625, "eval_signal/advantage_abs_mean": 0.9094541817903519, "eval_signal/advantage_pre_scale_abs_mean": 0.2041856087744236, "eval_signal/advantage_pre_scale_std": 0.22104624286293983, "eval_signal/advantage_std": 0.9876727759838104, "eval_signal/brier_reward/centered_abs_mean": 0.22672728821635246, "eval_signal/brier_reward/group_std_mean": 0.278080090880394, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.10234775766730309, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022672730032354593, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0434112548828125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.052254452370107174, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019623446743935347, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004341125721111894, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_0/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_1/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_10/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_15/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_20/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_25/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.30074895173311234, "eval_signal/frontier_coverage_5/group_std_mean": 0.4004068300127983, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01945252064615488, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004300709872040898, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.245, "step": 50 }, { "calibration/aurc": 0.4293310472623869, "calibration/batch_distribution_entropy": 0.991187044559398, "calibration/buffer_distribution_entropy": 0.9787306580823815, "calibration/confidence_entropy": 0.5058137954460664, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0027397260273972603, "calibration/coverage@20%": 0.03790973581213307, "calibration/coverage@25%": 0.04533390410958904, "calibration/coverage@30%": 0.0867653803816047, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19659801829272178, "calibration/mean_confidence": 0.4781637847637869, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 531.0, "completions/max_terminated_length": 531.0, "completions/mean_length": 111.984375, "completions/mean_terminated_length": 112.01715240478515, "completions/min_length": 19.6, "completions/min_terminated_length": 48.6, "epoch": 0.176, "grad_norm": 0.016006283462047577, "learning_rate": 1e-06, "loss": 0.0029, "num_tokens": 179783382.0, "reward": 0.8927610993385315, "reward_std": 0.11393142342567444, "rewards/accuracy_reward": 0.46640625, "rewards/brier_reward": 0.7212023615837098, "rewards/confidence_uniqueness_reward": 0.9581124544143677, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.07742121592164039, "rewards/frontier_coverage_1": 0.07742121592164039, "rewards/frontier_coverage_10": 0.07742121592164039, "rewards/frontier_coverage_15": 0.07742121592164039, "rewards/frontier_coverage_20": 0.07742121592164039, "rewards/frontier_coverage_25": 0.07742121592164039, "rewards/frontier_coverage_5": 0.07742121592164039, "rewards/frontier_entropy_batch_reward": -0.1592807114124298, "signal/accuracy_reward/centered_abs_mean": 0.141259765625, "signal/accuracy_reward/group_std_mean": 0.17928344309329985, "signal/accuracy_reward/group_zero_std_frac": 0.5125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8460919618606567, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0706298828125, "signal/advantage_abs_mean": 0.7873261332511902, "signal/advantage_pre_scale_abs_mean": 0.09122534543275833, "signal/advantage_pre_scale_std": 0.12952570170164107, "signal/advantage_std": 0.9832993149757385, "signal/brier_reward/centered_abs_mean": 0.20927453339099883, "signal/brier_reward/group_std_mean": 0.2582443118095398, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2531407684087753, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02092745341360569, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013171698711812497, "signal/confidence_uniqueness_reward/group_std_mean": 0.01759280003607273, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.015911542251706122, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013171698665246367, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004652977641671896, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_0/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_1/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_1/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_10/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_10/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_15/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_15/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_20/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_20/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_25/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_25/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_coverage_5/centered_abs_mean": 0.25559466183185575, "signal/frontier_coverage_5/group_std_mean": 0.3214977204799652, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04414609596133232, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003655003709718585, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2443944036960602, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3236381232738495, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.29559103548526766, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.024439441785216332, "step": 55 }, { "calibration/aurc": 0.3614467503210282, "calibration/batch_distribution_entropy": 0.9762620070934765, "calibration/buffer_distribution_entropy": 0.982707586617636, "calibration/confidence_entropy": 0.47250542952602925, "calibration/coverage@0%": 0.0023483365949119373, "calibration/coverage@1%": 0.0023483365949119373, "calibration/coverage@10%": 0.014081610812133072, "calibration/coverage@15%": 0.026194807974559687, "calibration/coverage@20%": 0.059058524951076316, "calibration/coverage@25%": 0.1892421416340509, "calibration/coverage@30%": 0.30892398483365946, "calibration/coverage@5%": 0.0023483365949119373, "calibration/ece": 0.15341142419107695, "calibration/mean_confidence": 0.43818721331033733, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 459.4, "completions/max_terminated_length": 459.4, "completions/mean_length": 112.08486328125, "completions/mean_terminated_length": 112.14996032714843, "completions/min_length": 10.0, "completions/min_terminated_length": 48.6, "epoch": 0.192, "grad_norm": 0.02954723685979843, "learning_rate": 1e-06, "loss": -0.0022, "num_tokens": 195745947.0, "reward": 0.9055456638336181, "reward_std": 0.11155757009983062, "rewards/accuracy_reward": 0.497265625, "rewards/brier_reward": 0.7332705736160279, "rewards/confidence_uniqueness_reward": 0.9544866442680359, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.08700279919430613, "rewards/frontier_coverage_1": 0.08700279919430613, "rewards/frontier_coverage_10": 0.08700279919430613, "rewards/frontier_coverage_15": 0.08700279919430613, "rewards/frontier_coverage_20": 0.08700279919430613, "rewards/frontier_coverage_25": 0.08700279919430613, "rewards/frontier_coverage_5": 0.08700279919430613, "rewards/frontier_entropy_batch_reward": -0.20278894305229186, "signal/accuracy_reward/centered_abs_mean": 0.139501953125, "signal/accuracy_reward/group_std_mean": 0.18089237213134765, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9052997827529907, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0697509765625, "signal/advantage_abs_mean": 0.775652015209198, "signal/advantage_pre_scale_abs_mean": 0.08811791241168976, "signal/advantage_pre_scale_std": 0.12764054387807847, "signal/advantage_std": 0.983202064037323, "signal/brier_reward/centered_abs_mean": 0.20351653397083283, "signal/brier_reward/group_std_mean": 0.25207469165325164, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.26565858721733093, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020351653546094896, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016574647277593613, "signal/confidence_uniqueness_reward/group_std_mean": 0.022703318297863005, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02189209684729576, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00165746477432549, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007327704038470983, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_coverage_0/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_0/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_1/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_1/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_10/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_10/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_15/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_15/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_20/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_20/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_25/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_25/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_coverage_5/centered_abs_mean": 0.2656306028366089, "signal/frontier_coverage_5/group_std_mean": 0.33098281025886533, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04965348467230797, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003798517771065235, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28159146904945376, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3595281183719635, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3709623396396637, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02815914712846279, "step": 60 }, { "calibration/aurc": 0.30011243720009795, "calibration/batch_distribution_entropy": 0.9771462317737031, "calibration/buffer_distribution_entropy": 0.9847172441681387, "calibration/confidence_entropy": 0.5078898292737076, "calibration/coverage@0%": 0.02265701443248532, "calibration/coverage@1%": 0.02265701443248532, "calibration/coverage@10%": 0.1675788894324853, "calibration/coverage@15%": 0.301340050146771, "calibration/coverage@20%": 0.3647283206947162, "calibration/coverage@25%": 0.428515625, "calibration/coverage@30%": 0.47742554427592954, "calibration/coverage@5%": 0.08320388943248533, "calibration/ece": 0.17360716271690285, "calibration/mean_confidence": 0.5349996273034741, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 443.2, "completions/max_terminated_length": 443.2, "completions/mean_length": 114.509765625, "completions/mean_terminated_length": 114.54372100830078, "completions/min_length": 20.6, "completions/min_terminated_length": 52.0, "epoch": 0.208, "grad_norm": 0.03158510476350784, "learning_rate": 1e-06, "loss": -0.0023, "num_tokens": 211950751.0, "reward": 0.9300533056259155, "reward_std": 0.10566971302032471, "rewards/accuracy_reward": 0.54306640625, "rewards/brier_reward": 0.7536995768547058, "rewards/confidence_uniqueness_reward": 0.9566167235374451, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.0559085650369525, "rewards/frontier_coverage_1": 0.0559085650369525, "rewards/frontier_coverage_10": 0.0559085650369525, "rewards/frontier_coverage_15": 0.0559085650369525, "rewards/frontier_coverage_20": 0.0559085650369525, "rewards/frontier_coverage_25": 0.0559085650369525, "rewards/frontier_coverage_5": 0.0559085650369525, "rewards/frontier_entropy_batch_reward": -0.1791268080472946, "signal/accuracy_reward/centered_abs_mean": 0.123236083984375, "signal/accuracy_reward/group_std_mean": 0.16450151801109314, "signal/accuracy_reward/group_zero_std_frac": 0.521875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8466056942939758, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0616180419921875, "signal/advantage_abs_mean": 0.7682868838310242, "signal/advantage_pre_scale_abs_mean": 0.08178776204586029, "signal/advantage_pre_scale_std": 0.1228803813457489, "signal/advantage_std": 0.9831345796585083, "signal/brier_reward/centered_abs_mean": 0.1688483715057373, "signal/brier_reward/group_std_mean": 0.2128828853368759, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23237936198711395, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016884836927056314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011919040419161319, "signal/confidence_uniqueness_reward/group_std_mean": 0.016283450275659563, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.016335343569517137, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011919040698558092, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005191143415868282, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_0/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_1/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_1/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_10/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_10/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_15/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_15/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_20/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_20/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_25/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_25/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_coverage_5/centered_abs_mean": 0.19982794523239136, "signal/frontier_coverage_5/group_std_mean": 0.2555452287197113, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03945437371730805, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028575395699590445, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25831425189971924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33621604442596437, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3541386485099792, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025831427052617074, "step": 65 }, { "calibration/aurc": 0.2777412552876487, "calibration/batch_distribution_entropy": 0.9853763418017791, "calibration/buffer_distribution_entropy": 0.9872255378108227, "calibration/confidence_entropy": 0.49916279117895535, "calibration/coverage@0%": 0.036328125, "calibration/coverage@1%": 0.036328125, "calibration/coverage@10%": 0.207421875, "calibration/coverage@15%": 0.3203125, "calibration/coverage@20%": 0.44375, "calibration/coverage@25%": 0.530859375, "calibration/coverage@30%": 0.6, "calibration/coverage@5%": 0.108984375, "calibration/ece": 0.15090117081731733, "calibration/mean_confidence": 0.45539058067229243, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 460.0, "completions/max_terminated_length": 460.0, "completions/mean_length": 116.38125, "completions/mean_terminated_length": 116.39248504638672, "completions/min_length": 47.6, "completions/min_terminated_length": 59.2, "epoch": 0.224, "grad_norm": 0.019556866958737373, "learning_rate": 1e-06, "loss": 0.0031, "num_tokens": 228295679.0, "reward": 0.91881822347641, "reward_std": 0.08917870223522187, "rewards/accuracy_reward": 0.502734375, "rewards/brier_reward": 0.784402334690094, "rewards/confidence_uniqueness_reward": 0.9537890195846558, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12251347005367279, "rewards/frontier_coverage_1": 0.12251347005367279, "rewards/frontier_coverage_10": 0.12251347005367279, "rewards/frontier_coverage_15": 0.12251347005367279, "rewards/frontier_coverage_20": 0.12251347005367279, "rewards/frontier_coverage_25": 0.12251347005367279, "rewards/frontier_coverage_5": 0.12251347005367279, "rewards/frontier_entropy_batch_reward": -0.18582858741283417, "signal/accuracy_reward/centered_abs_mean": 0.10262451171875, "signal/accuracy_reward/group_std_mean": 0.13685290068387984, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8474323511123657, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.051312255859375, "signal/advantage_abs_mean": 0.7694117426872253, "signal/advantage_pre_scale_abs_mean": 0.06965965777635574, "signal/advantage_pre_scale_std": 0.10694814324378968, "signal/advantage_std": 0.9828300595283508, "signal/brier_reward/centered_abs_mean": 0.14114340543746948, "signal/brier_reward/group_std_mean": 0.18228788077831268, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23655705153942108, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014114340580999852, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012035387381911278, "signal/confidence_uniqueness_reward/group_std_mean": 0.015354960411787032, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020498888939619063, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012035387801006437, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018597409129142762, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_0/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_1/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_1/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_10/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_10/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_15/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_15/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_20/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_20/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_25/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_25/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_coverage_5/centered_abs_mean": 0.19413280189037324, "signal/frontier_coverage_5/group_std_mean": 0.2504078775644302, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046854938566684726, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027760989032685757, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2574520826339722, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33517150282859803, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43392097353935244, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02574520818889141, "step": 70 }, { "calibration/aurc": 0.3451851171793069, "calibration/batch_distribution_entropy": 0.9558716587565168, "calibration/buffer_distribution_entropy": 0.9895516259326991, "calibration/confidence_entropy": 0.48006278103461353, "calibration/coverage@0%": 0.015625, "calibration/coverage@1%": 0.015625, "calibration/coverage@10%": 0.15859375, "calibration/coverage@15%": 0.22265625, "calibration/coverage@20%": 0.250390625, "calibration/coverage@25%": 0.266015625, "calibration/coverage@30%": 0.351171875, "calibration/coverage@5%": 0.0578125, "calibration/ece": 0.16036406672020234, "calibration/mean_confidence": 0.5099695563505072, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 394.2, "completions/max_terminated_length": 394.2, "completions/mean_length": 119.859765625, "completions/mean_terminated_length": 119.87151336669922, "completions/min_length": 49.6, "completions/min_terminated_length": 62.6, "epoch": 0.24, "grad_norm": 0.014534401707351208, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 244774723.0, "reward": 0.9338017344474793, "reward_std": 0.09010614305734635, "rewards/accuracy_reward": 0.5482421875, "rewards/brier_reward": 0.7745696544647217, "rewards/confidence_uniqueness_reward": 0.9516302347183228, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.0856390755623579, "rewards/frontier_coverage_1": 0.0856390755623579, "rewards/frontier_coverage_10": 0.0856390755623579, "rewards/frontier_coverage_15": 0.0856390755623579, "rewards/frontier_coverage_20": 0.0856390755623579, "rewards/frontier_coverage_25": 0.0856390755623579, "rewards/frontier_coverage_5": 0.0856390755623579, "rewards/frontier_entropy_batch_reward": -0.21463005542755126, "signal/accuracy_reward/centered_abs_mean": 0.1029052734375, "signal/accuracy_reward/group_std_mean": 0.13757123202085494, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8875034332275391, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05145263671875, "signal/advantage_abs_mean": 0.7700738310813904, "signal/advantage_pre_scale_abs_mean": 0.06997058242559433, "signal/advantage_pre_scale_std": 0.10958524942398071, "signal/advantage_std": 0.9827866315841675, "signal/brier_reward/centered_abs_mean": 0.13409124910831452, "signal/brier_reward/group_std_mean": 0.17349932193756104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.23266932964324952, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013409125059843064, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012924287095665931, "signal/confidence_uniqueness_reward/group_std_mean": 0.016424901597201825, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02235339842736721, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012924287468194962, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001489312667399645, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_0/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_1/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_1/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_10/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_10/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_15/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_15/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_20/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_20/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_25/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_25/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_coverage_5/centered_abs_mean": 0.16890641450881957, "signal/frontier_coverage_5/group_std_mean": 0.22147968411445618, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04203609824180603, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002415361627936363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2731468856334686, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3471518874168396, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47121843695640564, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02731468863785267, "step": 75 }, { "calibration/aurc": 0.2380899878689502, "calibration/batch_distribution_entropy": 0.9543793623709981, "calibration/buffer_distribution_entropy": 0.9904958080647177, "calibration/confidence_entropy": 0.4724079818447585, "calibration/coverage@0%": 0.038671875, "calibration/coverage@1%": 0.038671875, "calibration/coverage@10%": 0.319921875, "calibration/coverage@15%": 0.382421875, "calibration/coverage@20%": 0.448828125, "calibration/coverage@25%": 0.51875, "calibration/coverage@30%": 0.60625, "calibration/coverage@5%": 0.196875, "calibration/ece": 0.14697595971741417, "calibration/mean_confidence": 0.5045763309062191, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 375.0, "completions/max_terminated_length": 375.0, "completions/mean_length": 130.82294921875, "completions/mean_terminated_length": 130.8369934082031, "completions/min_length": 51.0, "completions/min_terminated_length": 64.2, "epoch": 0.256, "grad_norm": 0.011844088323414326, "learning_rate": 1e-06, "loss": -0.0062, "num_tokens": 261169166.0, "reward": 0.9297836661338806, "reward_std": 0.08369718790054322, "rewards/accuracy_reward": 0.54404296875, "rewards/brier_reward": 0.7865662574768066, "rewards/confidence_uniqueness_reward": 0.9491964101791381, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.10523570999503136, "rewards/frontier_coverage_1": 0.10523570999503136, "rewards/frontier_coverage_10": 0.10523570999503136, "rewards/frontier_coverage_15": 0.10523570999503136, "rewards/frontier_coverage_20": 0.10523570999503136, "rewards/frontier_coverage_25": 0.10523570999503136, "rewards/frontier_coverage_5": 0.10523570999503136, "rewards/frontier_entropy_batch_reward": -0.2629933536052704, "signal/accuracy_reward/centered_abs_mean": 0.088494873046875, "signal/accuracy_reward/group_std_mean": 0.12247141897678375, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7810031771659851, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442474365234375, "signal/advantage_abs_mean": 0.7776289224624634, "signal/advantage_pre_scale_abs_mean": 0.0648388609290123, "signal/advantage_pre_scale_std": 0.1018882930278778, "signal/advantage_std": 0.9827472686767578, "signal/brier_reward/centered_abs_mean": 0.11603698432445526, "signal/brier_reward/group_std_mean": 0.1506718337535858, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.204732221364975, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011603698506951332, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014407002925872802, "signal/confidence_uniqueness_reward/group_std_mean": 0.018257852271199228, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025521285086870193, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001440700376406312, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0017066342756152154, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_0/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_1/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_1/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_10/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_10/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_15/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_15/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_20/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_20/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_25/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_25/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_coverage_5/centered_abs_mean": 0.14679024815559388, "signal/frontier_coverage_5/group_std_mean": 0.1926429718732834, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03715235441923141, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020991005701944234, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3055807054042816, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3773996353149414, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5410493850708008, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030558070167899132, "step": 80 }, { "calibration/aurc": 0.3613767694028809, "calibration/batch_distribution_entropy": 0.9694824840031633, "calibration/buffer_distribution_entropy": 0.9915738867672383, "calibration/confidence_entropy": 0.5049346154650323, "calibration/coverage@0%": 0.030859375, "calibration/coverage@1%": 0.030859375, "calibration/coverage@10%": 0.1234375, "calibration/coverage@15%": 0.1546875, "calibration/coverage@20%": 0.27734375, "calibration/coverage@25%": 0.353125, "calibration/coverage@30%": 0.4, "calibration/coverage@5%": 0.061328125, "calibration/ece": 0.1336495954580236, "calibration/mean_confidence": 0.46672275323620094, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 153.59951171875, "completions/mean_terminated_length": 153.61471862792968, "completions/min_length": 60.8, "completions/min_terminated_length": 77.8, "epoch": 0.272, "grad_norm": 0.011426495388150215, "learning_rate": 1e-06, "loss": -0.0031, "num_tokens": 277707721.0, "reward": 0.9228897452354431, "reward_std": 0.08293161988258362, "rewards/accuracy_reward": 0.51865234375, "rewards/brier_reward": 0.788739800453186, "rewards/confidence_uniqueness_reward": 0.9513486862182617, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.12113445177674294, "rewards/frontier_coverage_1": 0.12113445177674294, "rewards/frontier_coverage_10": 0.12113445177674294, "rewards/frontier_coverage_15": 0.12113445177674294, "rewards/frontier_coverage_20": 0.12113445177674294, "rewards/frontier_coverage_25": 0.12113445177674294, "rewards/frontier_coverage_5": 0.12113445177674294, "rewards/frontier_entropy_batch_reward": -0.22522012591362, "signal/accuracy_reward/centered_abs_mean": 0.101690673828125, "signal/accuracy_reward/group_std_mean": 0.13314552009105682, "signal/accuracy_reward/group_zero_std_frac": 0.628125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9522302627563477, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0508453369140625, "signal/advantage_abs_mean": 0.7683973193168641, "signal/advantage_pre_scale_abs_mean": 0.06439381539821624, "signal/advantage_pre_scale_std": 0.10144704878330231, "signal/advantage_std": 0.9826340436935425, "signal/brier_reward/centered_abs_mean": 0.11361265182495117, "signal/brier_reward/group_std_mean": 0.1463920384645462, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2135068655014038, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011361265368759633, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012319310754537582, "signal/confidence_uniqueness_reward/group_std_mean": 0.015735189616680145, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023272840678691863, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012319311266765, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018666807562112808, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_0/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_1/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_1/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_10/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_10/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_15/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_15/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_20/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_20/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_25/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_25/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_coverage_5/centered_abs_mean": 0.17033348679542543, "signal/frontier_coverage_5/group_std_mean": 0.21856584250926972, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04595231339335441, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002435768861323595, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2705967366695404, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34300823211669923, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.510302847623825, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027059673517942428, "step": 85 }, { "calibration/aurc": 0.27859286701221453, "calibration/batch_distribution_entropy": 0.970638670654175, "calibration/buffer_distribution_entropy": 0.9927677427137654, "calibration/confidence_entropy": 0.478569513861055, "calibration/coverage@0%": 0.022666187622309198, "calibration/coverage@1%": 0.022666187622309198, "calibration/coverage@10%": 0.15825892857142856, "calibration/coverage@15%": 0.22705173679060664, "calibration/coverage@20%": 0.27435099681996084, "calibration/coverage@25%": 0.3427592954990215, "calibration/coverage@30%": 0.4955708781800391, "calibration/coverage@5%": 0.0640724376223092, "calibration/ece": 0.12627269961365964, "calibration/mean_confidence": 0.5429138799075158, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 443.8, "completions/max_terminated_length": 443.8, "completions/mean_length": 161.62763671875, "completions/mean_terminated_length": 161.62763671875, "completions/min_length": 79.2, "completions/min_terminated_length": 79.2, "epoch": 0.288, "grad_norm": 0.011343343183398247, "learning_rate": 1e-06, "loss": -0.0037, "num_tokens": 294320964.0, "reward": 0.9370681405067444, "reward_std": 0.0841323509812355, "rewards/accuracy_reward": 0.55107421875, "rewards/brier_reward": 0.790608286857605, "rewards/confidence_uniqueness_reward": 0.9515504360198974, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.10305131077766419, "rewards/frontier_coverage_1": 0.10305131077766419, "rewards/frontier_coverage_10": 0.10305131077766419, "rewards/frontier_coverage_15": 0.10305131077766419, "rewards/frontier_coverage_20": 0.10305131077766419, "rewards/frontier_coverage_25": 0.10305131077766419, "rewards/frontier_coverage_5": 0.10305131077766419, "rewards/frontier_entropy_batch_reward": -0.22902617156505584, "signal/accuracy_reward/centered_abs_mean": 0.098126220703125, "signal/accuracy_reward/group_std_mean": 0.13257125914096832, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8775075793266296, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0490631103515625, "signal/advantage_abs_mean": 0.7683995246887207, "signal/advantage_pre_scale_abs_mean": 0.06486314833164215, "signal/advantage_pre_scale_std": 0.10212174206972122, "signal/advantage_std": 0.9827188014984131, "signal/brier_reward/centered_abs_mean": 0.11409407407045365, "signal/brier_reward/group_std_mean": 0.14835602343082427, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20434542298316954, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011409407667815685, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012881199643015862, "signal/confidence_uniqueness_reward/group_std_mean": 0.016685626842081545, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02331661656498909, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001288120006211102, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003475642204284668, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_0/group_std_mean": 0.20462646484375, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_1/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_1/group_std_mean": 0.20462646484375, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_10/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_10/group_std_mean": 0.20462646484375, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_15/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_15/group_std_mean": 0.20462646484375, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_20/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_20/group_std_mean": 0.20462646484375, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_25/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_25/group_std_mean": 0.20462646484375, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_coverage_5/centered_abs_mean": 0.15965526700019836, "signal/frontier_coverage_5/group_std_mean": 0.20462646484375, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04117161184549332, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022830702364444733, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28002009987831117, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3523731052875519, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5039195537567138, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02800200991332531, "step": 90 }, { "calibration/aurc": 0.27400511315915477, "calibration/batch_distribution_entropy": 0.9699587413307474, "calibration/buffer_distribution_entropy": 0.9931079326030978, "calibration/confidence_entropy": 0.4935722923864724, "calibration/coverage@0%": 0.023046875, "calibration/coverage@1%": 0.023046875, "calibration/coverage@10%": 0.130078125, "calibration/coverage@15%": 0.20546875, "calibration/coverage@20%": 0.308984375, "calibration/coverage@25%": 0.5109375, "calibration/coverage@30%": 0.6390625, "calibration/coverage@5%": 0.052734375, "calibration/ece": 0.08987905700365367, "calibration/mean_confidence": 0.5164384378723209, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 547.6, "completions/max_terminated_length": 547.6, "completions/mean_length": 181.79541015625, "completions/mean_terminated_length": 181.84756164550782, "completions/min_length": 38.0, "completions/min_terminated_length": 92.0, "epoch": 0.304, "grad_norm": 0.009509476833045483, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 311112501.0, "reward": 0.926408588886261, "reward_std": 0.08505538254976272, "rewards/accuracy_reward": 0.524609375, "rewards/brier_reward": 0.7812718510627746, "rewards/confidence_uniqueness_reward": 0.9513449430465698, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.11590675860643387, "rewards/frontier_coverage_1": 0.11590675860643387, "rewards/frontier_coverage_10": 0.11590675860643387, "rewards/frontier_coverage_15": 0.11590675860643387, "rewards/frontier_coverage_20": 0.11590675860643387, "rewards/frontier_coverage_25": 0.11590675860643387, "rewards/frontier_coverage_5": 0.11590675860643387, "rewards/frontier_entropy_batch_reward": -0.20564735531806946, "signal/accuracy_reward/centered_abs_mean": 0.10882568359375, "signal/accuracy_reward/group_std_mean": 0.14444092959165572, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.974223279953003, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.054412841796875, "signal/advantage_abs_mean": 0.7569803476333619, "signal/advantage_pre_scale_abs_mean": 0.06511303558945655, "signal/advantage_pre_scale_std": 0.10234367698431016, "signal/advantage_std": 0.9826973438262939, "signal/brier_reward/centered_abs_mean": 0.11599338501691818, "signal/brier_reward/group_std_mean": 0.14832000136375428, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20899596214294433, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011599338613450527, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01347355991601944, "signal/confidence_uniqueness_reward/group_std_mean": 0.018178258277475833, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024797194078564642, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013473560102283955, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007120777480304241, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_0/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_1/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_1/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_10/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_10/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_15/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_15/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_20/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_20/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_25/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_25/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_coverage_5/centered_abs_mean": 0.1757221668958664, "signal/frontier_coverage_5/group_std_mean": 0.22423993349075316, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04559025391936302, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002512827096506953, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26609655022621154, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3424116730690002, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4856810808181763, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02660965621471405, "step": 95 }, { "calibration/aurc": 0.20195095907390134, "calibration/batch_distribution_entropy": 0.9734814252165089, "calibration/buffer_distribution_entropy": 0.9936861512598052, "calibration/confidence_entropy": 0.4821587175226608, "calibration/coverage@0%": 0.1, "calibration/coverage@1%": 0.130859375, "calibration/coverage@10%": 0.361328125, "calibration/coverage@15%": 0.449609375, "calibration/coverage@20%": 0.55546875, "calibration/coverage@25%": 0.678515625, "calibration/coverage@30%": 0.750390625, "calibration/coverage@5%": 0.287109375, "calibration/ece": 0.145156608944858, "calibration/mean_confidence": 0.5455970664283126, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 594.6, "completions/max_terminated_length": 594.6, "completions/mean_length": 192.10126953125, "completions/mean_terminated_length": 192.21360473632814, "completions/min_length": 39.0, "completions/min_terminated_length": 82.6, "epoch": 0.32, "grad_norm": 0.01124265231192112, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 328168322.0, "reward": 0.938688862323761, "reward_std": 0.07619319260120391, "rewards/accuracy_reward": 0.54619140625, "rewards/brier_reward": 0.8016320586204528, "rewards/confidence_uniqueness_reward": 0.951561689376831, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.11333505995571613, "rewards/frontier_coverage_1": 0.11333505995571613, "rewards/frontier_coverage_10": 0.11333505995571613, "rewards/frontier_coverage_15": 0.11333505995571613, "rewards/frontier_coverage_20": 0.11333505995571613, "rewards/frontier_coverage_25": 0.11333505995571613, "rewards/frontier_coverage_5": 0.11333505995571613, "rewards/frontier_entropy_batch_reward": -0.20729252099990844, "signal/accuracy_reward/centered_abs_mean": 0.083489990234375, "signal/accuracy_reward/group_std_mean": 0.11030419915914536, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8474804639816285, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0417449951171875, "signal/advantage_abs_mean": 0.7693067193031311, "signal/advantage_pre_scale_abs_mean": 0.058793623745441434, "signal/advantage_pre_scale_std": 0.09535037130117416, "signal/advantage_std": 0.982493007183075, "signal/brier_reward/centered_abs_mean": 0.09994795173406601, "signal/brier_reward/group_std_mean": 0.13043854236602784, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20334738492965698, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0099947951734066, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013709683902561665, "signal/confidence_uniqueness_reward/group_std_mean": 0.018594534881412984, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027801194787025453, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013709683902561665, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.003194373194128275, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013123654946684837, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_coverage_0/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_0/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_1/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_1/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_10/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_10/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_15/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_15/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_20/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_20/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_25/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_25/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_coverage_5/centered_abs_mean": 0.1436137169599533, "signal/frontier_coverage_5/group_std_mean": 0.18481407761573793, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041797750443220136, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002053676126524806, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2712838649749756, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3431327760219574, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5484427690505982, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02712838724255562, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4838447164186258, "eval_calibration/batch_distribution_entropy": 0.9087856722466727, "eval_calibration/buffer_distribution_entropy": 0.9937493854082251, "eval_calibration/confidence_entropy": 0.48006873213412093, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.0859375, "eval_calibration/coverage@25%": 0.1953125, "eval_calibration/coverage@30%": 0.2265625, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.19939102374750206, "eval_calibration/mean_confidence": 0.4764055658567398, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 371.0, "eval_completions/max_terminated_length": 371.0, "eval_completions/mean_length": 199.60984802246094, "eval_completions/mean_terminated_length": 199.60984802246094, "eval_completions/min_length": 117.75, "eval_completions/min_terminated_length": 117.75, "eval_loss": 0.0, "eval_num_tokens": 328168322.0, "eval_reward": 0.795561820268631, "eval_reward_std": 0.22068889066576958, "eval_rewards/accuracy_reward": 0.408203125, "eval_rewards/brier_reward": 0.8055337518453598, "eval_rewards/confidence_uniqueness_reward": 0.900390625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.2084694728255272, "eval_rewards/frontier_coverage_1": 0.2084694728255272, "eval_rewards/frontier_coverage_10": 0.2084694728255272, "eval_rewards/frontier_coverage_15": 0.2084694728255272, "eval_rewards/frontier_coverage_20": 0.2084694728255272, "eval_rewards/frontier_coverage_25": 0.2084694728255272, "eval_rewards/frontier_coverage_5": 0.2084694728255272, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.2095, "eval_samples_per_second": 24.741, "eval_signal/accuracy_reward/centered_abs_mean": 0.4696044921875, "eval_signal/accuracy_reward/group_std_mean": 0.4919809103012085, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0656675398349762, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23480224609375, "eval_signal/advantage_abs_mean": 0.9272723495960236, "eval_signal/advantage_pre_scale_abs_mean": 0.2049938254058361, "eval_signal/advantage_pre_scale_std": 0.21827252581715584, "eval_signal/advantage_std": 0.9876697510480881, "eval_signal/brier_reward/centered_abs_mean": 0.18734565749764442, "eval_signal/brier_reward/group_std_mean": 0.23995699733495712, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08503718301653862, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01873456547036767, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.037811279296875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.044189696200191975, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017147937789559364, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037811279762536287, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_0/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_1/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_10/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_15/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_20/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_25/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.35348744690418243, "eval_signal/frontier_coverage_5/group_std_mean": 0.42605313658714294, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022959773894399405, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005054870503954589, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.198, "step": 100 }, { "calibration/aurc": 0.26602253089668076, "calibration/batch_distribution_entropy": 0.9677981082899623, "calibration/buffer_distribution_entropy": 0.9949084560096411, "calibration/confidence_entropy": 0.49570299589735656, "calibration/coverage@0%": 0.02149278375733855, "calibration/coverage@1%": 0.02149278375733855, "calibration/coverage@10%": 0.1090508806262231, "calibration/coverage@15%": 0.18800605430528378, "calibration/coverage@20%": 0.29751406555772997, "calibration/coverage@25%": 0.5710066046966732, "calibration/coverage@30%": 0.7069708598336595, "calibration/coverage@5%": 0.023055283757338552, "calibration/ece": 0.11665484961072996, "calibration/mean_confidence": 0.5079856328555454, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 564.6, "completions/max_terminated_length": 564.6, "completions/mean_length": 196.78076171875, "completions/mean_terminated_length": 196.8188690185547, "completions/min_length": 61.0, "completions/min_terminated_length": 100.4, "epoch": 0.336, "grad_norm": 0.008793617598712444, "learning_rate": 1e-06, "loss": 0.0022, "num_tokens": 344905789.0, "reward": 0.9403074264526368, "reward_std": 0.07877994924783707, "rewards/accuracy_reward": 0.55146484375, "rewards/brier_reward": 0.8012589335441589, "rewards/confidence_uniqueness_reward": 0.9518896460533142, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.10983360260725021, "rewards/frontier_coverage_1": 0.10983360260725021, "rewards/frontier_coverage_10": 0.10983360260725021, "rewards/frontier_coverage_15": 0.10983360260725021, "rewards/frontier_coverage_20": 0.10983360260725021, "rewards/frontier_coverage_25": 0.10983360260725021, "rewards/frontier_coverage_5": 0.10983360260725021, "rewards/frontier_entropy_batch_reward": -0.21636542975902556, "signal/accuracy_reward/centered_abs_mean": 0.087689208984375, "signal/accuracy_reward/group_std_mean": 0.11891601234674454, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.863071084022522, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0438446044921875, "signal/advantage_abs_mean": 0.7660103678703308, "signal/advantage_pre_scale_abs_mean": 0.060546606034040454, "signal/advantage_pre_scale_std": 0.0978748396039009, "signal/advantage_std": 0.9825198411941528, "signal/brier_reward/centered_abs_mean": 0.10040059238672257, "signal/brier_reward/group_std_mean": 0.1297599822282791, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20032111704349517, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010040059126913548, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012283951044082642, "signal/confidence_uniqueness_reward/group_std_mean": 0.015965880826115608, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02450355812907219, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012283950811251998, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0036628665402531624, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_0/group_std_mean": 0.18509421646595, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_1/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_1/group_std_mean": 0.18509421646595, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_10/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_10/group_std_mean": 0.18509421646595, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_15/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_15/group_std_mean": 0.18509421646595, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_20/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_20/group_std_mean": 0.18509421646595, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_25/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_25/group_std_mean": 0.18509421646595, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_coverage_5/centered_abs_mean": 0.14154843986034393, "signal/frontier_coverage_5/group_std_mean": 0.18509421646595, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0403674952685833, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020241427468135954, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2685790777206421, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3413450360298157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5357294917106629, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026857908815145493, "step": 105 }, { "calibration/aurc": 0.25054426640565053, "calibration/batch_distribution_entropy": 0.9506281697531032, "calibration/buffer_distribution_entropy": 0.9976023879596688, "calibration/confidence_entropy": 0.43937590127320797, "calibration/coverage@0%": 0.118359375, "calibration/coverage@1%": 0.130859375, "calibration/coverage@10%": 0.30546875, "calibration/coverage@15%": 0.440625, "calibration/coverage@20%": 0.505078125, "calibration/coverage@25%": 0.551171875, "calibration/coverage@30%": 0.619140625, "calibration/coverage@5%": 0.195703125, "calibration/ece": 0.11887008576271958, "calibration/mean_confidence": 0.44458669144123214, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 562.2, "completions/max_terminated_length": 562.2, "completions/mean_length": 201.9111328125, "completions/mean_terminated_length": 201.95140380859374, "completions/min_length": 61.0, "completions/min_terminated_length": 101.8, "epoch": 0.352, "grad_norm": 0.01325867511332035, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 362233775.0, "reward": 0.9122628808021546, "reward_std": 0.0789569452404976, "rewards/accuracy_reward": 0.483984375, "rewards/brier_reward": 0.8145543575286865, "rewards/confidence_uniqueness_reward": 0.9494835615158081, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.17774035930633544, "rewards/frontier_coverage_1": 0.17774035930633544, "rewards/frontier_coverage_10": 0.17774035930633544, "rewards/frontier_coverage_15": 0.17774035930633544, "rewards/frontier_coverage_20": 0.17774035930633544, "rewards/frontier_coverage_25": 0.17774035930633544, "rewards/frontier_coverage_5": 0.17774035930633544, "rewards/frontier_entropy_batch_reward": -0.23778423070907592, "signal/accuracy_reward/centered_abs_mean": 0.09801025390625, "signal/accuracy_reward/group_std_mean": 0.12496584504842759, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0013089537620545, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049005126953125, "signal/advantage_abs_mean": 0.7665701508522034, "signal/advantage_pre_scale_abs_mean": 0.06204437762498856, "signal/advantage_pre_scale_std": 0.09929190725088119, "signal/advantage_std": 0.9824689507484436, "signal/brier_reward/centered_abs_mean": 0.10055015981197357, "signal/brier_reward/group_std_mean": 0.1297568753361702, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20597705543041228, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010055016353726387, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013859933055937291, "signal/confidence_uniqueness_reward/group_std_mean": 0.018241026997566225, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028573965653777122, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013859933242201805, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006001142412424087, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_0/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_1/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_1/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_10/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_10/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_15/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_15/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_20/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_20/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_25/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_25/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_coverage_5/centered_abs_mean": 0.1629865735769272, "signal/frontier_coverage_5/group_std_mean": 0.20776084065437317, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0477764330804348, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023307080380618574, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26890730261802676, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34364842176437377, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5524909615516662, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02689073123037815, "step": 110 }, { "calibration/aurc": 0.3163110406084484, "calibration/batch_distribution_entropy": 0.9615172344168172, "calibration/buffer_distribution_entropy": 0.9982005314375307, "calibration/confidence_entropy": 0.4628636321817998, "calibration/coverage@0%": 0.0390625, "calibration/coverage@1%": 0.0390625, "calibration/coverage@10%": 0.1140625, "calibration/coverage@15%": 0.169140625, "calibration/coverage@20%": 0.350390625, "calibration/coverage@25%": 0.41796875, "calibration/coverage@30%": 0.521484375, "calibration/coverage@5%": 0.073046875, "calibration/ece": 0.1247380959840438, "calibration/mean_confidence": 0.507100053732364, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 588.6, "completions/max_terminated_length": 588.6, "completions/mean_length": 209.62236328125, "completions/mean_terminated_length": 209.66268310546874, "completions/min_length": 64.4, "completions/min_terminated_length": 105.6, "epoch": 0.368, "grad_norm": 0.01056207250803709, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 379445780.0, "reward": 0.9280990719795227, "reward_std": 0.07095708847045898, "rewards/accuracy_reward": 0.52216796875, "rewards/brier_reward": 0.8045044064521789, "rewards/confidence_uniqueness_reward": 0.950655996799469, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.1437540665268898, "rewards/frontier_coverage_1": 0.1437540665268898, "rewards/frontier_coverage_10": 0.1437540665268898, "rewards/frontier_coverage_15": 0.1437540665268898, "rewards/frontier_coverage_20": 0.1437540665268898, "rewards/frontier_coverage_25": 0.14409504383802413, "rewards/frontier_coverage_5": 0.1437540665268898, "rewards/frontier_entropy_batch_reward": -0.22749127745628356, "signal/accuracy_reward/centered_abs_mean": 0.073712158203125, "signal/accuracy_reward/group_std_mean": 0.09910732954740524, "signal/accuracy_reward/group_zero_std_frac": 0.709375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7912806749343873, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0368560791015625, "signal/advantage_abs_mean": 0.772281539440155, "signal/advantage_pre_scale_abs_mean": 0.05444162786006927, "signal/advantage_pre_scale_std": 0.0894496574997902, "signal/advantage_std": 0.9823481917381287, "signal/brier_reward/centered_abs_mean": 0.09946328550577163, "signal/brier_reward/group_std_mean": 0.12772661596536636, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21652222871780397, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009946328960359097, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013414673879742623, "signal/confidence_uniqueness_reward/group_std_mean": 0.017754827439785004, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029049182683229445, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013414673740044236, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006210983730852604, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_0/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_coverage_1/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_1/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_coverage_10/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_10/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_coverage_15/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_15/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_coverage_20/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_20/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_coverage_25/centered_abs_mean": 0.14017903208732604, "signal/frontier_coverage_25/group_std_mean": 0.17928515374660492, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0433915801346302, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020045602228492498, "signal/frontier_coverage_5/centered_abs_mean": 0.14087859094142913, "signal/frontier_coverage_5/group_std_mean": 0.18012421131134032, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04363641962409019, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002014563884586096, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2679985582828522, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34032227396965026, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5820306539535522, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026799855381250383, "step": 115 }, { "calibration/aurc": 0.291872629605271, "calibration/batch_distribution_entropy": 0.9695508342320689, "calibration/buffer_distribution_entropy": 0.9974123973699189, "calibration/confidence_entropy": 0.470467238089471, "calibration/coverage@0%": 0.108984375, "calibration/coverage@1%": 0.179296875, "calibration/coverage@10%": 0.296484375, "calibration/coverage@15%": 0.32578125, "calibration/coverage@20%": 0.3609375, "calibration/coverage@25%": 0.400390625, "calibration/coverage@30%": 0.451953125, "calibration/coverage@5%": 0.241015625, "calibration/ece": 0.16722453167166276, "calibration/mean_confidence": 0.4628005829226116, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 796.8, "completions/max_terminated_length": 796.8, "completions/mean_length": 212.99912109375, "completions/mean_terminated_length": 213.1254455566406, "completions/min_length": 22.4, "completions/min_terminated_length": 101.4, "epoch": 0.384, "grad_norm": 0.01185943465679884, "learning_rate": 1e-06, "loss": 0.0008, "num_tokens": 396483403.0, "reward": 0.9401529908180237, "reward_std": 0.07879967391490936, "rewards/accuracy_reward": 0.55224609375, "rewards/brier_reward": 0.8110327959060669, "rewards/confidence_uniqueness_reward": 0.9498059153556824, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.12273619621992111, "rewards/frontier_coverage_1": 0.12273619621992111, "rewards/frontier_coverage_10": 0.12273619621992111, "rewards/frontier_coverage_15": 0.12273619621992111, "rewards/frontier_coverage_20": 0.12227635085582733, "rewards/frontier_coverage_25": 0.11141111701726913, "rewards/frontier_coverage_5": 0.12273619621992111, "rewards/frontier_entropy_batch_reward": -0.23878300786018372, "signal/accuracy_reward/centered_abs_mean": 0.089276123046875, "signal/accuracy_reward/group_std_mean": 0.12189686745405197, "signal/accuracy_reward/group_zero_std_frac": 0.634375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8961179256439209, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0446380615234375, "signal/advantage_abs_mean": 0.7660392999649048, "signal/advantage_pre_scale_abs_mean": 0.06024746969342232, "signal/advantage_pre_scale_std": 0.0975036308169365, "signal/advantage_std": 0.9825122117996216, "signal/brier_reward/centered_abs_mean": 0.09333293437957764, "signal/brier_reward/group_std_mean": 0.12096812278032303, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18763849139213562, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009333293326199055, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013957409746944904, "signal/confidence_uniqueness_reward/group_std_mean": 0.01862836182117462, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02798389606177807, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013957409886643291, "signal/format_reward/centered_abs_mean": 0.00111083984375, "signal/format_reward/group_std_mean": 0.0026419460773468018, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0111533023416996, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000555419921875, "signal/frontier_coverage_0/centered_abs_mean": 0.14243515133857726, "signal/frontier_coverage_0/group_std_mean": 0.1850941926240921, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041002404689788816, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002036822633817792, "signal/frontier_coverage_1/centered_abs_mean": 0.14243515133857726, "signal/frontier_coverage_1/group_std_mean": 0.1850941926240921, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041002404689788816, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002036822633817792, "signal/frontier_coverage_10/centered_abs_mean": 0.14243515133857726, "signal/frontier_coverage_10/group_std_mean": 0.1850941926240921, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041002404689788816, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002036822633817792, "signal/frontier_coverage_15/centered_abs_mean": 0.14243515133857726, "signal/frontier_coverage_15/group_std_mean": 0.1850941926240921, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.041002404689788816, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002036822633817792, "signal/frontier_coverage_20/centered_abs_mean": 0.14176848232746125, "signal/frontier_coverage_20/group_std_mean": 0.1842800945043564, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.040808319300413134, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020272892899811266, "signal/frontier_coverage_25/centered_abs_mean": 0.12744964957237243, "signal/frontier_coverage_25/group_std_mean": 0.16639121770858764, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03667210936546326, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018225299660116433, "signal/frontier_coverage_5/centered_abs_mean": 0.14243515133857726, "signal/frontier_coverage_5/group_std_mean": 0.1850941926240921, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041002404689788816, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002036822633817792, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27840029299259184, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34748801589012146, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5590726673603058, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02784002907574177, "step": 120 }, { "calibration/aurc": 0.37680810561623346, "calibration/batch_distribution_entropy": 0.977627751790393, "calibration/buffer_distribution_entropy": 0.9969336700930927, "calibration/confidence_entropy": 0.5016000149395057, "calibration/coverage@0%": 0.01691244452662722, "calibration/coverage@1%": 0.01691244452662722, "calibration/coverage@10%": 0.08933062130177515, "calibration/coverage@15%": 0.13830128205128206, "calibration/coverage@20%": 0.1840930103550296, "calibration/coverage@25%": 0.22992711415187378, "calibration/coverage@30%": 0.288245808678501, "calibration/coverage@5%": 0.03613011587771203, "calibration/ece": 0.13497199447987554, "calibration/mean_confidence": 0.5139445167093368, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 550.0, "completions/max_terminated_length": 550.0, "completions/mean_length": 213.1404296875, "completions/mean_terminated_length": 213.41405029296874, "completions/min_length": 66.8, "completions/min_terminated_length": 86.4, "epoch": 0.4, "grad_norm": 0.0097922058776021, "learning_rate": 1e-06, "loss": -0.006, "num_tokens": 413702409.0, "reward": 0.9193836450576782, "reward_std": 0.09198853373527527, "rewards/accuracy_reward": 0.523828125, "rewards/brier_reward": 0.7929557800292969, "rewards/confidence_uniqueness_reward": 0.947076940536499, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.12256665341556072, "rewards/frontier_coverage_1": 0.12256665341556072, "rewards/frontier_coverage_10": 0.12256665341556072, "rewards/frontier_coverage_15": 0.12178453020751476, "rewards/frontier_coverage_20": 0.119577931240201, "rewards/frontier_coverage_25": 0.10327411331236362, "rewards/frontier_coverage_5": 0.12256665341556072, "rewards/frontier_entropy_batch_reward": -0.27691553235054017, "signal/accuracy_reward/centered_abs_mean": 0.10191650390625, "signal/accuracy_reward/group_std_mean": 0.1383904129266739, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8994345784187316, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.050958251953125, "signal/advantage_abs_mean": 0.7682394504547119, "signal/advantage_pre_scale_abs_mean": 0.07057161033153533, "signal/advantage_pre_scale_std": 0.11377080827951432, "signal/advantage_std": 0.9827415585517884, "signal/brier_reward/centered_abs_mean": 0.10817324370145798, "signal/brier_reward/group_std_mean": 0.13862178921699525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19183254837989808, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010817324556410313, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016499792411923408, "signal/confidence_uniqueness_reward/group_std_mean": 0.022187639772892, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029168443754315376, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016499792458489538, "signal/format_reward/centered_abs_mean": 0.00272216796875, "signal/format_reward/group_std_mean": 0.0051541978027671576, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02258917409926653, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001361083984375, "signal/frontier_coverage_0/centered_abs_mean": 0.13868848383426666, "signal/frontier_coverage_0/group_std_mean": 0.18039654791355134, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03508671894669533, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001983245322480798, "signal/frontier_coverage_1/centered_abs_mean": 0.13868848383426666, "signal/frontier_coverage_1/group_std_mean": 0.18039654791355134, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03508671894669533, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001983245322480798, "signal/frontier_coverage_10/centered_abs_mean": 0.13868848383426666, "signal/frontier_coverage_10/group_std_mean": 0.18039654791355134, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03508671894669533, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001983245322480798, "signal/frontier_coverage_15/centered_abs_mean": 0.13756284713745118, "signal/frontier_coverage_15/group_std_mean": 0.17895146012306212, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034796612709760665, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019671486690640448, "signal/frontier_coverage_20/centered_abs_mean": 0.1341120943427086, "signal/frontier_coverage_20/group_std_mean": 0.17450543940067292, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03391275852918625, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019178029382601381, "signal/frontier_coverage_25/centered_abs_mean": 0.10924447625875473, "signal/frontier_coverage_25/group_std_mean": 0.14243850409984588, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02756657600402832, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015621959697455167, "signal/frontier_coverage_5/centered_abs_mean": 0.13868848383426666, "signal/frontier_coverage_5/group_std_mean": 0.18039654791355134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03508671894669533, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001983245322480798, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30260345339775085, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37170406579971316, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.538099491596222, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030260344967246056, "step": 125 }, { "calibration/aurc": 0.2657363718758238, "calibration/batch_distribution_entropy": 0.9714411064590933, "calibration/buffer_distribution_entropy": 0.9981619683864125, "calibration/confidence_entropy": 0.4767882249726864, "calibration/coverage@0%": 0.015234375, "calibration/coverage@1%": 0.015234375, "calibration/coverage@10%": 0.119140625, "calibration/coverage@15%": 0.19296875, "calibration/coverage@20%": 0.306640625, "calibration/coverage@25%": 0.56015625, "calibration/coverage@30%": 0.67890625, "calibration/coverage@5%": 0.0484375, "calibration/ece": 0.11560704005460878, "calibration/mean_confidence": 0.5249319950650804, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 555.0, "completions/max_terminated_length": 555.0, "completions/mean_length": 218.605078125, "completions/mean_terminated_length": 218.64696350097657, "completions/min_length": 69.6, "completions/min_terminated_length": 92.0, "epoch": 0.416, "grad_norm": 0.01152903400361538, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 430822109.0, "reward": 0.9380086183547973, "reward_std": 0.07931896895170212, "rewards/accuracy_reward": 0.54072265625, "rewards/brier_reward": 0.814470624923706, "rewards/confidence_uniqueness_reward": 0.9511290192604065, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.13615846037864685, "rewards/frontier_coverage_1": 0.13615846037864685, "rewards/frontier_coverage_10": 0.13615846037864685, "rewards/frontier_coverage_15": 0.1354565665125847, "rewards/frontier_coverage_20": 0.13337061703205108, "rewards/frontier_coverage_25": 0.11261514723300933, "rewards/frontier_coverage_5": 0.13615846037864685, "rewards/frontier_entropy_batch_reward": -0.21960244774818422, "signal/accuracy_reward/centered_abs_mean": 0.086480712890625, "signal/accuracy_reward/group_std_mean": 0.11708700507879258, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8494049906730652, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0432403564453125, "signal/advantage_abs_mean": 0.7665403366088868, "signal/advantage_pre_scale_abs_mean": 0.06138978749513626, "signal/advantage_pre_scale_std": 0.10005667060613632, "signal/advantage_std": 0.9825314521789551, "signal/brier_reward/centered_abs_mean": 0.09667001217603684, "signal/brier_reward/group_std_mean": 0.12675763815641403, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19187456667423247, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009667001478374005, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013016079366207123, "signal/confidence_uniqueness_reward/group_std_mean": 0.016788151860237122, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02605282999575138, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013016079319640994, "signal/format_reward/centered_abs_mean": 0.00072021484375, "signal/format_reward/group_std_mean": 0.0014778789598494768, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007402191683650017, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000360107421875, "signal/frontier_coverage_0/centered_abs_mean": 0.14196341782808303, "signal/frontier_coverage_0/group_std_mean": 0.18739462196826934, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039983388781547544, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002030076924711466, "signal/frontier_coverage_1/centered_abs_mean": 0.14196341782808303, "signal/frontier_coverage_1/group_std_mean": 0.18739462196826934, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039983388781547544, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002030076924711466, "signal/frontier_coverage_10/centered_abs_mean": 0.14196341782808303, "signal/frontier_coverage_10/group_std_mean": 0.18739462196826934, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039983388781547544, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002030076924711466, "signal/frontier_coverage_15/centered_abs_mean": 0.14051727205514908, "signal/frontier_coverage_15/group_std_mean": 0.1854836732149124, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0395765632390976, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020093969767913224, "signal/frontier_coverage_20/centered_abs_mean": 0.13647186160087585, "signal/frontier_coverage_20/group_std_mean": 0.1800833076238632, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03843574151396752, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019515476189553738, "signal/frontier_coverage_25/centered_abs_mean": 0.10870475172996522, "signal/frontier_coverage_25/group_std_mean": 0.1432620793581009, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030629120394587518, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015544779598712921, "signal/frontier_coverage_5/centered_abs_mean": 0.14196341782808303, "signal/frontier_coverage_5/group_std_mean": 0.18739462196826934, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.039983388781547544, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002030076924711466, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27345497012138364, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3447937786579132, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.545515489578247, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027345497533679007, "step": 130 }, { "calibration/aurc": 0.2394546361506189, "calibration/batch_distribution_entropy": 0.9664220854755664, "calibration/buffer_distribution_entropy": 0.9978692133192111, "calibration/confidence_entropy": 0.44952975490033015, "calibration/coverage@0%": 0.029296875, "calibration/coverage@1%": 0.029296875, "calibration/coverage@10%": 0.223046875, "calibration/coverage@15%": 0.2796875, "calibration/coverage@20%": 0.430078125, "calibration/coverage@25%": 0.510546875, "calibration/coverage@30%": 0.701171875, "calibration/coverage@5%": 0.112890625, "calibration/ece": 0.13110902721049844, "calibration/mean_confidence": 0.5009625814429861, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 600.2, "completions/max_terminated_length": 600.2, "completions/mean_length": 220.32685546875, "completions/mean_terminated_length": 220.3916229248047, "completions/min_length": 44.4, "completions/min_terminated_length": 109.8, "epoch": 0.432, "grad_norm": 0.014838258735835552, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 448092592.0, "reward": 0.9469399571418762, "reward_std": 0.07579587548971176, "rewards/accuracy_reward": 0.56298828125, "rewards/brier_reward": 0.8220736503601074, "rewards/confidence_uniqueness_reward": 0.9500454664230347, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.1361553490161896, "rewards/frontier_coverage_1": 0.1361553490161896, "rewards/frontier_coverage_10": 0.13602706864476205, "rewards/frontier_coverage_15": 0.13480819016695023, "rewards/frontier_coverage_20": 0.13133834376931192, "rewards/frontier_coverage_25": 0.10667677372694015, "rewards/frontier_coverage_5": 0.1361553490161896, "rewards/frontier_entropy_batch_reward": -0.24737223386764526, "signal/accuracy_reward/centered_abs_mean": 0.086553955078125, "signal/accuracy_reward/group_std_mean": 0.11445238739252091, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9037783741950989, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0432769775390625, "signal/advantage_abs_mean": 0.7740630865097046, "signal/advantage_pre_scale_abs_mean": 0.05964174121618271, "signal/advantage_pre_scale_std": 0.09494355022907257, "signal/advantage_std": 0.9824240684509278, "signal/brier_reward/centered_abs_mean": 0.09422143697738647, "signal/brier_reward/group_std_mean": 0.12117904126644134, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19863314628601075, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009422143734991551, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013962790369987488, "signal/confidence_uniqueness_reward/group_std_mean": 0.018255869299173354, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029283100739121437, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013962790602818132, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00573458094149828, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1540747344493866, "signal/frontier_coverage_0/group_std_mean": 0.19681704938411712, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04627474918961525, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022032687440514563, "signal/frontier_coverage_1/centered_abs_mean": 0.1540747344493866, "signal/frontier_coverage_1/group_std_mean": 0.19681704938411712, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04627474918961525, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022032687440514563, "signal/frontier_coverage_10/centered_abs_mean": 0.1538453459739685, "signal/frontier_coverage_10/group_std_mean": 0.1965191125869751, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.046209176629781724, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002199988439679146, "signal/frontier_coverage_15/centered_abs_mean": 0.15173088908195495, "signal/frontier_coverage_15/group_std_mean": 0.19380154609680175, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.045577727258205414, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002169751701876521, "signal/frontier_coverage_20/centered_abs_mean": 0.1440102219581604, "signal/frontier_coverage_20/group_std_mean": 0.18400128185749054, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.043285074084997176, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020593460416421295, "signal/frontier_coverage_25/centered_abs_mean": 0.10412507951259613, "signal/frontier_coverage_25/group_std_mean": 0.13293323963880538, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03133079074323177, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014889885671436787, "signal/frontier_coverage_5/centered_abs_mean": 0.1540747344493866, "signal/frontier_coverage_5/group_std_mean": 0.19681704938411712, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04627474918961525, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022032687440514563, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2915738165378571, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36391377449035645, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6120963454246521, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029157382622361182, "step": 135 }, { "calibration/aurc": 0.24097328644526012, "calibration/batch_distribution_entropy": 0.9634986379181794, "calibration/buffer_distribution_entropy": 0.997582320879768, "calibration/confidence_entropy": 0.49413236744092554, "calibration/coverage@0%": 0.026953125, "calibration/coverage@1%": 0.026953125, "calibration/coverage@10%": 0.084375, "calibration/coverage@15%": 0.22353458292563602, "calibration/coverage@20%": 0.41814839163405093, "calibration/coverage@25%": 0.5553043970156556, "calibration/coverage@30%": 0.7393208781800391, "calibration/coverage@5%": 0.04921875, "calibration/ece": 0.09394564360698121, "calibration/mean_confidence": 0.5647713994787259, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 575.0, "completions/max_terminated_length": 575.0, "completions/mean_length": 226.0078125, "completions/mean_terminated_length": 226.18872985839843, "completions/min_length": 41.4, "completions/min_terminated_length": 103.4, "epoch": 0.448, "grad_norm": 0.014536180533468723, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 465359712.0, "reward": 0.934195339679718, "reward_std": 0.08252922743558884, "rewards/accuracy_reward": 0.5310546875, "rewards/brier_reward": 0.8201260566711426, "rewards/confidence_uniqueness_reward": 0.950800085067749, "rewards/format_reward": 0.998828125, "rewards/frontier_coverage_0": 0.14471644312143325, "rewards/frontier_coverage_1": 0.14471644312143325, "rewards/frontier_coverage_10": 0.14471644312143325, "rewards/frontier_coverage_15": 0.14337524473667146, "rewards/frontier_coverage_20": 0.13384944051504136, "rewards/frontier_coverage_25": 0.10090558081865311, "rewards/frontier_coverage_5": 0.14471644312143325, "rewards/frontier_entropy_batch_reward": -0.21523725092411042, "signal/accuracy_reward/centered_abs_mean": 0.091162109375, "signal/accuracy_reward/group_std_mean": 0.11895354390144348, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8764581441879272, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0455810546875, "signal/advantage_abs_mean": 0.7745669841766357, "signal/advantage_pre_scale_abs_mean": 0.06429816111922264, "signal/advantage_pre_scale_std": 0.10496852099895478, "signal/advantage_std": 0.9825978994369506, "signal/brier_reward/centered_abs_mean": 0.09663276970386506, "signal/brier_reward/group_std_mean": 0.12589550763368607, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18589994609355925, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009663277119398118, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014237134903669357, "signal/confidence_uniqueness_reward/group_std_mean": 0.019735709950327873, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0274048775434494, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014237135415896772, "signal/format_reward/centered_abs_mean": 0.00208740234375, "signal/format_reward/group_std_mean": 0.004553806036710739, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020145339518785478, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001043701171875, "signal/frontier_coverage_0/centered_abs_mean": 0.1478554666042328, "signal/frontier_coverage_0/group_std_mean": 0.18775065541267394, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.040648031234741214, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002114333095960319, "signal/frontier_coverage_1/centered_abs_mean": 0.1478554666042328, "signal/frontier_coverage_1/group_std_mean": 0.18775065541267394, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.040648031234741214, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002114333095960319, "signal/frontier_coverage_10/centered_abs_mean": 0.1478554666042328, "signal/frontier_coverage_10/group_std_mean": 0.18775065541267394, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.040648031234741214, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002114333095960319, "signal/frontier_coverage_15/centered_abs_mean": 0.14521766006946563, "signal/frontier_coverage_15/group_std_mean": 0.18448179364204406, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.039922721683979034, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020766125060617925, "signal/frontier_coverage_20/centered_abs_mean": 0.12875569313764573, "signal/frontier_coverage_20/group_std_mean": 0.16414850652217866, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035395897924900055, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00184120642952621, "signal/frontier_coverage_25/centered_abs_mean": 0.08961157202720642, "signal/frontier_coverage_25/group_std_mean": 0.11496616899967194, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0246336467564106, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012814455199986695, "signal/frontier_coverage_5/centered_abs_mean": 0.1478554666042328, "signal/frontier_coverage_5/group_std_mean": 0.18775065541267394, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040648031234741214, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002114333095960319, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27715436220169065, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3507562756538391, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5331802070140839, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027715435624122618, "step": 140 }, { "calibration/aurc": 0.34746743831006704, "calibration/batch_distribution_entropy": 0.9729663060399908, "calibration/buffer_distribution_entropy": 0.9974155039324156, "calibration/confidence_entropy": 0.5069634313783685, "calibration/coverage@0%": 0.007032778864970645, "calibration/coverage@1%": 0.007032778864970645, "calibration/coverage@10%": 0.007814028864970646, "calibration/coverage@15%": 0.050782778864970644, "calibration/coverage@20%": 0.11487432729941291, "calibration/coverage@25%": 0.3137460249510763, "calibration/coverage@30%": 0.44777091487279846, "calibration/coverage@5%": 0.007032778864970645, "calibration/ece": 0.12659113833545074, "calibration/mean_confidence": 0.48351704059525724, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 612.2, "completions/max_terminated_length": 612.2, "completions/mean_length": 226.98642578125, "completions/mean_terminated_length": 227.0522918701172, "completions/min_length": 42.8, "completions/min_terminated_length": 108.8, "epoch": 0.464, "grad_norm": 0.011709939688444138, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 482854869.0, "reward": 0.9039804100990295, "reward_std": 0.07382192313671113, "rewards/accuracy_reward": 0.47568359375, "rewards/brier_reward": 0.7968503117561341, "rewards/confidence_uniqueness_reward": 0.9505188941955567, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.158881214261055, "rewards/frontier_coverage_1": 0.158881214261055, "rewards/frontier_coverage_10": 0.158881214261055, "rewards/frontier_coverage_15": 0.1568503975868225, "rewards/frontier_coverage_20": 0.13968808948993683, "rewards/frontier_coverage_25": 0.09043478444218636, "rewards/frontier_coverage_5": 0.158881214261055, "rewards/frontier_entropy_batch_reward": -0.2307354539632797, "signal/accuracy_reward/centered_abs_mean": 0.072039794921875, "signal/accuracy_reward/group_std_mean": 0.09741799160838127, "signal/accuracy_reward/group_zero_std_frac": 0.7125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7294652104377747, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0360198974609375, "signal/advantage_abs_mean": 0.765180766582489, "signal/advantage_pre_scale_abs_mean": 0.05663144513964653, "signal/advantage_pre_scale_std": 0.09198210388422012, "signal/advantage_std": 0.9824758410453797, "signal/brier_reward/centered_abs_mean": 0.10376063138246536, "signal/brier_reward/group_std_mean": 0.1336098790168762, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21310822963714598, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010376062802970409, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013512828387320042, "signal/confidence_uniqueness_reward/group_std_mean": 0.017725981958210468, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02775384560227394, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001351282838732004, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005904573574662208, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.14577461481094361, "signal/frontier_coverage_0/group_std_mean": 0.18616759181022643, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042809315770864484, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020845770370215176, "signal/frontier_coverage_1/centered_abs_mean": 0.14577461481094361, "signal/frontier_coverage_1/group_std_mean": 0.18616759181022643, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042809315770864484, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020845770370215176, "signal/frontier_coverage_10/centered_abs_mean": 0.14577461481094361, "signal/frontier_coverage_10/group_std_mean": 0.18616759181022643, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042809315770864484, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020845770370215176, "signal/frontier_coverage_15/centered_abs_mean": 0.1432782530784607, "signal/frontier_coverage_15/group_std_mean": 0.1829966723918915, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04207362085580826, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002048878977075219, "signal/frontier_coverage_20/centered_abs_mean": 0.12409319430589676, "signal/frontier_coverage_20/group_std_mean": 0.15879313945770263, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03639562539756298, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017745327437296509, "signal/frontier_coverage_25/centered_abs_mean": 0.08080256581306458, "signal/frontier_coverage_25/group_std_mean": 0.10407637059688568, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023712591081857682, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011554765980690718, "signal/frontier_coverage_5/centered_abs_mean": 0.14577461481094361, "signal/frontier_coverage_5/group_std_mean": 0.18616759181022643, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042809315770864484, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020845770370215176, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27938825488090513, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3512773871421814, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.568844985961914, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027938826382160185, "step": 145 }, { "calibration/aurc": 0.2291198538847199, "calibration/batch_distribution_entropy": 0.9672892013539134, "calibration/buffer_distribution_entropy": 0.9967666576020949, "calibration/confidence_entropy": 0.4510841710064442, "calibration/coverage@0%": 0.040251192514677105, "calibration/coverage@1%": 0.040251192514677105, "calibration/coverage@10%": 0.19861867049902152, "calibration/coverage@15%": 0.3887743089530332, "calibration/coverage@20%": 0.45448492539138946, "calibration/coverage@25%": 0.5873960371819961, "calibration/coverage@30%": 0.7125351638943249, "calibration/coverage@5%": 0.0930390777886497, "calibration/ece": 0.12407148786161874, "calibration/mean_confidence": 0.4931261034620563, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 617.6, "completions/max_terminated_length": 617.6, "completions/mean_length": 216.017578125, "completions/mean_terminated_length": 216.14576416015626, "completions/min_length": 40.8, "completions/min_terminated_length": 106.8, "epoch": 0.48, "grad_norm": 0.017295807600021362, "learning_rate": 1e-06, "loss": 0.0016, "num_tokens": 500114921.0, "reward": 0.9307031989097595, "reward_std": 0.08427495062351227, "rewards/accuracy_reward": 0.53408203125, "rewards/brier_reward": 0.8068124175071716, "rewards/confidence_uniqueness_reward": 0.949408769607544, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.14723927676677703, "rewards/frontier_coverage_1": 0.14723927676677703, "rewards/frontier_coverage_10": 0.14699542224407197, "rewards/frontier_coverage_15": 0.14517690539360045, "rewards/frontier_coverage_20": 0.12480606287717819, "rewards/frontier_coverage_25": 0.08327750265598297, "rewards/frontier_coverage_5": 0.14723927676677703, "rewards/frontier_entropy_batch_reward": -0.25137184262275697, "signal/accuracy_reward/centered_abs_mean": 0.104425048828125, "signal/accuracy_reward/group_std_mean": 0.13805123120546342, "signal/accuracy_reward/group_zero_std_frac": 0.603125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0096543788909913, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0522125244140625, "signal/advantage_abs_mean": 0.7595040321350097, "signal/advantage_pre_scale_abs_mean": 0.06471362709999084, "signal/advantage_pre_scale_std": 0.1039919227361679, "signal/advantage_std": 0.9825853824615478, "signal/brier_reward/centered_abs_mean": 0.10939399302005767, "signal/brier_reward/group_std_mean": 0.1398113638162613, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21181057393550873, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010939398780465126, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01502783726900816, "signal/confidence_uniqueness_reward/group_std_mean": 0.02021808587014675, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02905021458864212, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015027837362140417, "signal/format_reward/centered_abs_mean": 0.001123046875, "signal/format_reward/group_std_mean": 0.0029782545287162067, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010594680532813072, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005615234375, "signal/frontier_coverage_0/centered_abs_mean": 0.17547143697738649, "signal/frontier_coverage_0/group_std_mean": 0.22376441955566406, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04860707297921181, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025092415045946835, "signal/frontier_coverage_1/centered_abs_mean": 0.17547143697738649, "signal/frontier_coverage_1/group_std_mean": 0.22376441955566406, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04860707297921181, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025092415045946835, "signal/frontier_coverage_10/centered_abs_mean": 0.175068262219429, "signal/frontier_coverage_10/group_std_mean": 0.22327833473682404, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.048494862765073775, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025034761521965264, "signal/frontier_coverage_15/centered_abs_mean": 0.17082957327365875, "signal/frontier_coverage_15/group_std_mean": 0.21801035106182098, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.04731718450784683, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024428628385066986, "signal/frontier_coverage_20/centered_abs_mean": 0.13655193746089936, "signal/frontier_coverage_20/group_std_mean": 0.1751394361257553, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03779491558670998, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001952692656777799, "signal/frontier_coverage_25/centered_abs_mean": 0.07820582389831543, "signal/frontier_coverage_25/group_std_mean": 0.1008089080452919, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021633072569966317, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011183432419784368, "signal/frontier_coverage_5/centered_abs_mean": 0.17547143697738649, "signal/frontier_coverage_5/group_std_mean": 0.22376441955566406, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04860707297921181, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025092415045946835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29524141550064087, "signal/frontier_entropy_batch_reward/group_std_mean": 0.366878867149353, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.570873761177063, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029524140805006028, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.45534531043154974, "eval_calibration/batch_distribution_entropy": 0.926511097298464, "eval_calibration/buffer_distribution_entropy": 0.9962565022453381, "eval_calibration/confidence_entropy": 0.4713693076099079, "eval_calibration/coverage@0%": 0.109375, "eval_calibration/coverage@1%": 0.109375, "eval_calibration/coverage@10%": 0.140625, "eval_calibration/coverage@15%": 0.140625, "eval_calibration/coverage@20%": 0.171875, "eval_calibration/coverage@25%": 0.2265625, "eval_calibration/coverage@30%": 0.2578125, "eval_calibration/coverage@5%": 0.109375, "eval_calibration/ece": 0.16938003565903828, "eval_calibration/mean_confidence": 0.4290637407118039, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 378.5, "eval_completions/max_terminated_length": 378.5, "eval_completions/mean_length": 212.3056983947754, "eval_completions/mean_terminated_length": 212.3056983947754, "eval_completions/min_length": 127.75, "eval_completions/min_terminated_length": 127.75, "eval_loss": 0.0, "eval_num_tokens": 500114921.0, "eval_reward": 0.8005315959453583, "eval_reward_std": 0.21906593441963196, "eval_rewards/accuracy_reward": 0.421875, "eval_rewards/brier_reward": 0.8141729980707169, "eval_rewards/confidence_uniqueness_reward": 0.89990234375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.21159575507044792, "eval_rewards/frontier_coverage_1": 0.21159575507044792, "eval_rewards/frontier_coverage_10": 0.2111768200993538, "eval_rewards/frontier_coverage_15": 0.2013298012316227, "eval_rewards/frontier_coverage_20": 0.14685893058776855, "eval_rewards/frontier_coverage_25": 0.07763573154807091, "eval_rewards/frontier_coverage_5": 0.21159575507044792, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 20.4621, "eval_samples_per_second": 24.435, "eval_signal/accuracy_reward/centered_abs_mean": 0.467529296875, "eval_signal/accuracy_reward/group_std_mean": 0.49060849100351334, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0691617131233215, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2337646484375, "eval_signal/advantage_abs_mean": 0.9310326725244522, "eval_signal/advantage_pre_scale_abs_mean": 0.2045309916138649, "eval_signal/advantage_pre_scale_std": 0.21670874953269958, "eval_signal/advantage_std": 0.9876660853624344, "eval_signal/brier_reward/centered_abs_mean": 0.1706998273730278, "eval_signal/brier_reward/group_std_mean": 0.21492478251457214, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07832564786076546, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01706998236477375, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.03839111328125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04556787060573697, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017674416303634644, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0038391113048419356, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3698278069496155, "eval_signal/frontier_coverage_0/group_std_mean": 0.44138026237487793, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.024246441666036844, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005288537475280464, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3698278069496155, "eval_signal/frontier_coverage_1/group_std_mean": 0.44138026237487793, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.024246441666036844, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005288537475280464, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3684462159872055, "eval_signal/frontier_coverage_10/group_std_mean": 0.43977469205856323, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024155837018042803, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005268780863843858, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.35635514557361603, "eval_signal/frontier_coverage_15/group_std_mean": 0.42567581683397293, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023364387918263674, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00509587861597538, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.26486819609999657, "eval_signal/frontier_coverage_20/group_std_mean": 0.32019487768411636, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01737035741098225, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003787615045439452, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.12020084448158741, "eval_signal/frontier_coverage_25/group_std_mean": 0.14966701343655586, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.007880826713517308, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017188721103593707, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3698278069496155, "eval_signal/frontier_coverage_5/group_std_mean": 0.44138026237487793, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.024246441666036844, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005288537475280464, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.195, "step": 150 }, { "calibration/aurc": 0.3319640447041089, "calibration/batch_distribution_entropy": 0.9701258176485588, "calibration/buffer_distribution_entropy": 0.9959642411136971, "calibration/confidence_entropy": 0.46898921326255544, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.173046875, "calibration/coverage@15%": 0.216015625, "calibration/coverage@20%": 0.25234375, "calibration/coverage@25%": 0.371484375, "calibration/coverage@30%": 0.487109375, "calibration/coverage@5%": 0.084765625, "calibration/ece": 0.1387425856761773, "calibration/mean_confidence": 0.48812744764793126, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 518.8, "completions/max_terminated_length": 518.8, "completions/mean_length": 208.6595703125, "completions/mean_terminated_length": 208.6799102783203, "completions/min_length": 88.4, "completions/min_terminated_length": 109.2, "epoch": 0.496, "grad_norm": 0.013091221451759338, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 517559435.0, "reward": 0.9493598937988281, "reward_std": 0.07383857369422912, "rewards/accuracy_reward": 0.57119140625, "rewards/brier_reward": 0.8089569687843323, "rewards/confidence_uniqueness_reward": 0.9516295194625854, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.10690305382013321, "rewards/frontier_coverage_1": 0.10690305382013321, "rewards/frontier_coverage_10": 0.10668100267648697, "rewards/frontier_coverage_15": 0.10049253255128861, "rewards/frontier_coverage_20": 0.07734967768192291, "rewards/frontier_coverage_25": 0.05591387003660202, "rewards/frontier_coverage_5": 0.10690305382013321, "rewards/frontier_entropy_batch_reward": -0.21602373123168944, "signal/accuracy_reward/centered_abs_mean": 0.072503662109375, "signal/accuracy_reward/group_std_mean": 0.10430038273334503, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7448198437690735, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0362518310546875, "signal/advantage_abs_mean": 0.7576547145843506, "signal/advantage_pre_scale_abs_mean": 0.055284418165683746, "signal/advantage_pre_scale_std": 0.09131110310554505, "signal/advantage_std": 0.9824570298194886, "signal/brier_reward/centered_abs_mean": 0.09475551843643189, "signal/brier_reward/group_std_mean": 0.1237260028719902, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19527413845062255, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009475551731884479, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01260056346654892, "signal/confidence_uniqueness_reward/group_std_mean": 0.016603745333850384, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025821058079600334, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012600563932210207, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005250536277890206, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.13314241766929627, "signal/frontier_coverage_0/group_std_mean": 0.17539192140102386, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03925930708646774, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019039364997297525, "signal/frontier_coverage_1/centered_abs_mean": 0.13314241766929627, "signal/frontier_coverage_1/group_std_mean": 0.17539192140102386, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03925930708646774, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019039364997297525, "signal/frontier_coverage_10/centered_abs_mean": 0.1326947808265686, "signal/frontier_coverage_10/group_std_mean": 0.17478241324424743, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039127344638109206, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018975354731082916, "signal/frontier_coverage_15/centered_abs_mean": 0.12710427790880202, "signal/frontier_coverage_15/group_std_mean": 0.1676137626171112, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.037500803172588346, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018175912089645862, "signal/frontier_coverage_20/centered_abs_mean": 0.09209925383329391, "signal/frontier_coverage_20/group_std_mean": 0.12147116661071777, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027178560569882392, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013170193182304502, "signal/frontier_coverage_25/centered_abs_mean": 0.055260706692934036, "signal/frontier_coverage_25/group_std_mean": 0.07163113951683045, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016299421340227126, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007902280893176794, "signal/frontier_coverage_5/centered_abs_mean": 0.13314241766929627, "signal/frontier_coverage_5/group_std_mean": 0.17539192140102386, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03925930708646774, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019039364997297525, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26992714703083037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3414120674133301, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5531856119632721, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026992715150117873, "step": 155 }, { "calibration/aurc": 0.2318834156944694, "calibration/batch_distribution_entropy": 0.9758865214113979, "calibration/buffer_distribution_entropy": 0.9960580008108133, "calibration/confidence_entropy": 0.5127858783596624, "calibration/coverage@0%": 0.04804840386497065, "calibration/coverage@1%": 0.04804840386497065, "calibration/coverage@10%": 0.39586442025440316, "calibration/coverage@15%": 0.46035806017612524, "calibration/coverage@20%": 0.5201489114481409, "calibration/coverage@25%": 0.5729023972602739, "calibration/coverage@30%": 0.676439426369863, "calibration/coverage@5%": 0.2101577788649706, "calibration/ece": 0.13026999208636952, "calibration/mean_confidence": 0.5039464087666106, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 475.4, "completions/max_terminated_length": 475.4, "completions/mean_length": 201.43544921875, "completions/mean_terminated_length": 201.43544921875, "completions/min_length": 109.4, "completions/min_terminated_length": 109.4, "epoch": 0.512, "grad_norm": 0.01248211320489645, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 534767798.0, "reward": 0.9442027688026429, "reward_std": 0.07601016610860825, "rewards/accuracy_reward": 0.55, "rewards/brier_reward": 0.831080436706543, "rewards/confidence_uniqueness_reward": 0.9524983644485474, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.13253318071365355, "rewards/frontier_coverage_1": 0.13253318071365355, "rewards/frontier_coverage_10": 0.1322164461016655, "rewards/frontier_coverage_15": 0.1266620621085167, "rewards/frontier_coverage_20": 0.08994849994778634, "rewards/frontier_coverage_25": 0.058000007271766664, "rewards/frontier_coverage_5": 0.13251669555902482, "rewards/frontier_entropy_batch_reward": -0.20609368085861207, "signal/accuracy_reward/centered_abs_mean": 0.0776611328125, "signal/accuracy_reward/group_std_mean": 0.1082550346851349, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7978426694869996, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03883056640625, "signal/advantage_abs_mean": 0.7590952754020691, "signal/advantage_pre_scale_abs_mean": 0.05769476890563965, "signal/advantage_pre_scale_std": 0.09525633007287979, "signal/advantage_std": 0.9824547410011292, "signal/brier_reward/centered_abs_mean": 0.08847891539335251, "signal/brier_reward/group_std_mean": 0.11509132534265518, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18230343163013457, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.008847891353070736, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011811437085270882, "signal/confidence_uniqueness_reward/group_std_mean": 0.015052905678749085, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024400829523801803, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001181143708527088, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0018382035195827484, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1262390971183777, "signal/frontier_coverage_0/group_std_mean": 0.16516108214855194, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037380128353834155, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018052191007882357, "signal/frontier_coverage_1/centered_abs_mean": 0.1262390971183777, "signal/frontier_coverage_1/group_std_mean": 0.16516108214855194, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037380128353834155, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018052191007882357, "signal/frontier_coverage_10/centered_abs_mean": 0.12583804428577422, "signal/frontier_coverage_10/group_std_mean": 0.16464310884475708, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037261802703142166, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017994840862229466, "signal/frontier_coverage_15/centered_abs_mean": 0.11806275993585587, "signal/frontier_coverage_15/group_std_mean": 0.15434040427207946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0349416546523571, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016882974887266755, "signal/frontier_coverage_20/centered_abs_mean": 0.07932479679584503, "signal/frontier_coverage_20/group_std_mean": 0.10407408773899078, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02345772311091423, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011343445628881454, "signal/frontier_coverage_25/centered_abs_mean": 0.04835866242647171, "signal/frontier_coverage_25/group_std_mean": 0.062165239453315736, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01430096197873354, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006915288628078997, "signal/frontier_coverage_5/centered_abs_mean": 0.1262181043624878, "signal/frontier_coverage_5/group_std_mean": 0.1651339590549469, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03737393617630005, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001804918935522437, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2682627737522125, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3407862842082977, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5545450925827027, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02682627774775028, "step": 160 }, { "calibration/aurc": 0.19211939730195765, "calibration/batch_distribution_entropy": 0.9900490234949414, "calibration/buffer_distribution_entropy": 0.9960697114973808, "calibration/confidence_entropy": 0.48208753862210135, "calibration/coverage@0%": 0.042578125, "calibration/coverage@1%": 0.05625, "calibration/coverage@10%": 0.394921875, "calibration/coverage@15%": 0.476953125, "calibration/coverage@20%": 0.626171875, "calibration/coverage@25%": 0.705859375, "calibration/coverage@30%": 0.78203125, "calibration/coverage@5%": 0.172265625, "calibration/ece": 0.13546201731264892, "calibration/mean_confidence": 0.503985014979136, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 702.2, "completions/max_terminated_length": 702.2, "completions/mean_length": 200.08134765625, "completions/mean_terminated_length": 200.08134765625, "completions/min_length": 113.8, "completions/min_terminated_length": 113.8, "epoch": 0.528, "grad_norm": 0.013641457073390484, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 551846167.0, "reward": 0.9402725458145141, "reward_std": 0.0775743842124939, "rewards/accuracy_reward": 0.54189453125, "rewards/brier_reward": 0.8286908626556396, "rewards/confidence_uniqueness_reward": 0.9521965026855469, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.15275048613548278, "rewards/frontier_coverage_1": 0.15275048613548278, "rewards/frontier_coverage_10": 0.1523078754544258, "rewards/frontier_coverage_15": 0.14277659058570863, "rewards/frontier_coverage_20": 0.10134280398488045, "rewards/frontier_coverage_25": 0.06609501764178276, "rewards/frontier_coverage_5": 0.15271973311901094, "rewards/frontier_entropy_batch_reward": -0.21930084824562074, "signal/accuracy_reward/centered_abs_mean": 0.089093017578125, "signal/accuracy_reward/group_std_mean": 0.12107728868722915, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9205329418182373, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0445465087890625, "signal/advantage_abs_mean": 0.7600311875343323, "signal/advantage_pre_scale_abs_mean": 0.05986330807209015, "signal/advantage_pre_scale_std": 0.09803989231586456, "signal/advantage_std": 0.9824413180351257, "signal/brier_reward/centered_abs_mean": 0.08963337987661361, "signal/brier_reward/group_std_mean": 0.11848148554563523, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18603391349315643, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0089633384719491, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011856555938720703, "signal/confidence_uniqueness_reward/group_std_mean": 0.014830333553254605, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024678315967321396, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011856555938720703, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.14773441553115846, "signal/frontier_coverage_0/group_std_mean": 0.19397561848163605, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04386069774627686, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021126021165400743, "signal/frontier_coverage_1/centered_abs_mean": 0.14773441553115846, "signal/frontier_coverage_1/group_std_mean": 0.19397561848163605, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04386069774627686, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021126021165400743, "signal/frontier_coverage_10/centered_abs_mean": 0.14719023555517197, "signal/frontier_coverage_10/group_std_mean": 0.19325293004512786, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043698471039533615, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002104820357635617, "signal/frontier_coverage_15/centered_abs_mean": 0.13356127738952636, "signal/frontier_coverage_15/group_std_mean": 0.17507249414920806, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03960662260651589, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019099263940006495, "signal/frontier_coverage_20/centered_abs_mean": 0.08548935353755951, "signal/frontier_coverage_20/group_std_mean": 0.11230573058128357, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025287511199712752, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012224977603182197, "signal/frontier_coverage_25/centered_abs_mean": 0.05132223665714264, "signal/frontier_coverage_25/group_std_mean": 0.06637111082673072, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01524107065051794, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007339079747907817, "signal/frontier_coverage_5/centered_abs_mean": 0.14769805371761321, "signal/frontier_coverage_5/group_std_mean": 0.1939283013343811, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.043849749863147734, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021120821125805377, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2675458133220673, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34275596737861636, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5556636452674866, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026754581928253175, "step": 165 }, { "calibration/aurc": 0.20294509108922654, "calibration/batch_distribution_entropy": 0.9546776256074434, "calibration/buffer_distribution_entropy": 0.9954494699983767, "calibration/confidence_entropy": 0.43927290283615655, "calibration/coverage@0%": 0.02265625, "calibration/coverage@1%": 0.02265625, "calibration/coverage@10%": 0.183984375, "calibration/coverage@15%": 0.3640625, "calibration/coverage@20%": 0.64375, "calibration/coverage@25%": 0.7265625, "calibration/coverage@30%": 0.79375, "calibration/coverage@5%": 0.0859375, "calibration/ece": 0.08818259891898914, "calibration/mean_confidence": 0.5408140311130343, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 506.0, "completions/max_terminated_length": 506.0, "completions/mean_length": 199.5048828125, "completions/mean_terminated_length": 199.52403869628907, "completions/min_length": 93.0, "completions/min_terminated_length": 114.4, "epoch": 0.544, "grad_norm": 0.012110423296689987, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 569052681.0, "reward": 0.9582688570022583, "reward_std": 0.07946361750364303, "rewards/accuracy_reward": 0.59326171875, "rewards/brier_reward": 0.8115216851234436, "rewards/confidence_uniqueness_reward": 0.951054048538208, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.09490841627120972, "rewards/frontier_coverage_1": 0.09490841627120972, "rewards/frontier_coverage_10": 0.09457989484071731, "rewards/frontier_coverage_15": 0.08632062524557113, "rewards/frontier_coverage_20": 0.06573501899838448, "rewards/frontier_coverage_25": 0.06225412338972092, "rewards/frontier_coverage_5": 0.09490256607532502, "rewards/frontier_entropy_batch_reward": -0.23010531365871428, "signal/accuracy_reward/centered_abs_mean": 0.091876220703125, "signal/accuracy_reward/group_std_mean": 0.1262803852558136, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8946531057357788, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459381103515625, "signal/advantage_abs_mean": 0.7608879446983338, "signal/advantage_pre_scale_abs_mean": 0.0603870801627636, "signal/advantage_pre_scale_std": 0.0982098788022995, "signal/advantage_std": 0.9825691699981689, "signal/brier_reward/centered_abs_mean": 0.0995179459452629, "signal/brier_reward/group_std_mean": 0.1290470004081726, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1940900981426239, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009951795265078545, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012855185754597187, "signal/confidence_uniqueness_reward/group_std_mean": 0.016486688517034054, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025106297805905342, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001285518566146493, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003780721127986908, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.14924255907535552, "signal/frontier_coverage_0/group_std_mean": 0.19254024922847748, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041698559373617175, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00213416856713593, "signal/frontier_coverage_1/centered_abs_mean": 0.14924255907535552, "signal/frontier_coverage_1/group_std_mean": 0.19254024922847748, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041698559373617175, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00213416856713593, "signal/frontier_coverage_10/centered_abs_mean": 0.14833201169967652, "signal/frontier_coverage_10/group_std_mean": 0.19136776328086852, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04144668877124787, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021211476530879738, "signal/frontier_coverage_15/centered_abs_mean": 0.132179157435894, "signal/frontier_coverage_15/group_std_mean": 0.17049570083618165, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036941982060670855, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018901619594544172, "signal/frontier_coverage_20/centered_abs_mean": 0.08309006989002228, "signal/frontier_coverage_20/group_std_mean": 0.10664766877889634, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023256586492061616, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011881879763677716, "signal/frontier_coverage_25/centered_abs_mean": 0.054529760777950284, "signal/frontier_coverage_25/group_std_mean": 0.06911371499300004, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015250799432396888, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007797755766659975, "signal/frontier_coverage_5/centered_abs_mean": 0.149216166138649, "signal/frontier_coverage_5/group_std_mean": 0.19250675439834594, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.041691217571496964, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002133791148662567, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2766104400157928, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3481856346130371, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5387953042984008, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027661043778061868, "step": 170 }, { "calibration/aurc": 0.22822852984902148, "calibration/batch_distribution_entropy": 0.9655225762120716, "calibration/buffer_distribution_entropy": 0.9952540292473733, "calibration/confidence_entropy": 0.49669730637340487, "calibration/coverage@0%": 0.134765625, "calibration/coverage@1%": 0.2109375, "calibration/coverage@10%": 0.37265625, "calibration/coverage@15%": 0.41640625, "calibration/coverage@20%": 0.46875, "calibration/coverage@25%": 0.50703125, "calibration/coverage@30%": 0.580859375, "calibration/coverage@5%": 0.28359375, "calibration/ece": 0.13059685649729424, "calibration/mean_confidence": 0.4859837007115013, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 445.4, "completions/max_terminated_length": 445.4, "completions/mean_length": 201.94296875, "completions/mean_terminated_length": 202.03963623046874, "completions/min_length": 43.0, "completions/min_terminated_length": 110.0, "epoch": 0.56, "grad_norm": 0.011170346289873123, "learning_rate": 1e-06, "loss": -0.0019, "num_tokens": 585941985.0, "reward": 0.9336464762687683, "reward_std": 0.07471658736467361, "rewards/accuracy_reward": 0.537890625, "rewards/brier_reward": 0.8261852979660034, "rewards/confidence_uniqueness_reward": 0.9501984477043152, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.14562440067529678, "rewards/frontier_coverage_1": 0.14562440067529678, "rewards/frontier_coverage_10": 0.14465759322047234, "rewards/frontier_coverage_15": 0.12708725333213805, "rewards/frontier_coverage_20": 0.0821958489716053, "rewards/frontier_coverage_25": 0.05846571922302246, "rewards/frontier_coverage_5": 0.14558007940649986, "rewards/frontier_entropy_batch_reward": -0.24837148189544678, "signal/accuracy_reward/centered_abs_mean": 0.07193603515625, "signal/accuracy_reward/group_std_mean": 0.10199806988239288, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7364301919937134, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035968017578125, "signal/advantage_abs_mean": 0.7664315819740295, "signal/advantage_pre_scale_abs_mean": 0.05653882250189781, "signal/advantage_pre_scale_std": 0.09310483485460282, "signal/advantage_std": 0.9824739694595337, "signal/brier_reward/centered_abs_mean": 0.08978293836116791, "signal/brier_reward/group_std_mean": 0.11686733067035675, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18375783562660217, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.008978294022381306, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013617974147200585, "signal/confidence_uniqueness_reward/group_std_mean": 0.018551425263285636, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027894172444939615, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013617974007502198, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009561251290142537, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.13698640018701552, "signal/frontier_coverage_0/group_std_mean": 0.1777627319097519, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04013899490237236, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001958905439823866, "signal/frontier_coverage_1/centered_abs_mean": 0.13698640018701552, "signal/frontier_coverage_1/group_std_mean": 0.1777627319097519, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04013899490237236, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001958905439823866, "signal/frontier_coverage_10/centered_abs_mean": 0.13580810874700547, "signal/frontier_coverage_10/group_std_mean": 0.17621684074401855, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03979276791214943, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019420559052377938, "signal/frontier_coverage_15/centered_abs_mean": 0.11728577166795731, "signal/frontier_coverage_15/group_std_mean": 0.15197210609912873, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03433753401041031, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016771864844486116, "signal/frontier_coverage_20/centered_abs_mean": 0.06963706314563751, "signal/frontier_coverage_20/group_std_mean": 0.09059495776891709, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020375318080186843, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000995810003951192, "signal/frontier_coverage_25/centered_abs_mean": 0.046510016173124315, "signal/frontier_coverage_25/group_std_mean": 0.06020733863115311, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013616615161299705, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006650932133197785, "signal/frontier_coverage_5/centered_abs_mean": 0.13694753050804137, "signal/frontier_coverage_5/group_std_mean": 0.17771164178848267, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04012744650244713, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019583496730774643, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2976150155067444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36509778499603274, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6098406314849854, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029761501774191856, "step": 175 }, { "calibration/aurc": 0.25513026872028377, "calibration/batch_distribution_entropy": 0.9604550999332429, "calibration/buffer_distribution_entropy": 0.9955296544792447, "calibration/confidence_entropy": 0.4595333190087679, "calibration/coverage@0%": 0.044537365459882586, "calibration/coverage@1%": 0.044537365459882586, "calibration/coverage@10%": 0.19186796722113503, "calibration/coverage@15%": 0.30011848703522503, "calibration/coverage@20%": 0.3857035836594912, "calibration/coverage@25%": 0.5373417624755381, "calibration/coverage@30%": 0.6647673067514678, "calibration/coverage@5%": 0.1257873654598826, "calibration/ece": 0.10435565679611791, "calibration/mean_confidence": 0.47617095896785244, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 481.8, "completions/max_terminated_length": 481.8, "completions/mean_length": 219.14482421875, "completions/mean_terminated_length": 219.14482421875, "completions/min_length": 99.0, "completions/min_terminated_length": 99.0, "epoch": 0.576, "grad_norm": 0.01333346776664257, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 603372652.0, "reward": 0.9337128639221192, "reward_std": 0.0706071525812149, "rewards/accuracy_reward": 0.53369140625, "rewards/brier_reward": 0.8254560589790344, "rewards/confidence_uniqueness_reward": 0.9495491623878479, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.15936579406261445, "rewards/frontier_coverage_1": 0.15936579406261445, "rewards/frontier_coverage_10": 0.15831765085458754, "rewards/frontier_coverage_15": 0.13857824504375457, "rewards/frontier_coverage_20": 0.09109884053468705, "rewards/frontier_coverage_25": 0.06888703480362893, "rewards/frontier_coverage_5": 0.15928825587034226, "rewards/frontier_entropy_batch_reward": -0.2395363688468933, "signal/accuracy_reward/centered_abs_mean": 0.070025634765625, "signal/accuracy_reward/group_std_mean": 0.09729954451322556, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7560475468635559, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0350128173828125, "signal/advantage_abs_mean": 0.7645478129386902, "signal/advantage_pre_scale_abs_mean": 0.05406961366534233, "signal/advantage_pre_scale_std": 0.0894312784075737, "signal/advantage_std": 0.9823664426803589, "signal/brier_reward/centered_abs_mean": 0.08987597078084945, "signal/brier_reward/group_std_mean": 0.11730807423591613, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19417197108268738, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00898759663105011, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013608038239181042, "signal/confidence_uniqueness_reward/group_std_mean": 0.0172698387876153, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029590404033660887, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00136080386582762, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002120504714548588, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13798716366291047, "signal/frontier_coverage_0/group_std_mean": 0.17882009148597716, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04257337599992752, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001973216445185244, "signal/frontier_coverage_1/centered_abs_mean": 0.13798716366291047, "signal/frontier_coverage_1/group_std_mean": 0.17882009148597716, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04257337599992752, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001973216445185244, "signal/frontier_coverage_10/centered_abs_mean": 0.13663693964481355, "signal/frontier_coverage_10/group_std_mean": 0.17710677087306975, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.042156299203634263, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019539082422852516, "signal/frontier_coverage_15/centered_abs_mean": 0.11394921988248825, "signal/frontier_coverage_15/group_std_mean": 0.1480672240257263, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.035157003253698346, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016294738743454218, "signal/frontier_coverage_20/centered_abs_mean": 0.06810803413391113, "signal/frontier_coverage_20/group_std_mean": 0.08849272578954696, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021022461354732513, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009739449014887214, "signal/frontier_coverage_25/centered_abs_mean": 0.05104894489049912, "signal/frontier_coverage_25/group_std_mean": 0.06541921645402908, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015766990557312965, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007299999007955194, "signal/frontier_coverage_5/centered_abs_mean": 0.13789215981960296, "signal/frontier_coverage_5/group_std_mean": 0.17869631946086884, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04254399910569191, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001971857948228717, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27577903866767883, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34956675171852114, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5942099094390869, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027577904239296912, "step": 180 }, { "calibration/aurc": 0.2714459675058643, "calibration/batch_distribution_entropy": 0.9616355905909744, "calibration/buffer_distribution_entropy": 0.9950926642195649, "calibration/confidence_entropy": 0.4635280172759992, "calibration/coverage@0%": 0.073046875, "calibration/coverage@1%": 0.083203125, "calibration/coverage@10%": 0.31015625, "calibration/coverage@15%": 0.38359375, "calibration/coverage@20%": 0.459375, "calibration/coverage@25%": 0.551953125, "calibration/coverage@30%": 0.61015625, "calibration/coverage@5%": 0.213671875, "calibration/ece": 0.13643323395430737, "calibration/mean_confidence": 0.4728889014629608, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 527.0, "completions/max_terminated_length": 527.0, "completions/mean_length": 237.69599609375, "completions/mean_terminated_length": 237.69599609375, "completions/min_length": 129.4, "completions/min_terminated_length": 129.4, "epoch": 0.592, "grad_norm": 0.016083823516964912, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 620974371.0, "reward": 0.929496419429779, "reward_std": 0.07398554235696793, "rewards/accuracy_reward": 0.5287109375, "rewards/brier_reward": 0.816726803779602, "rewards/confidence_uniqueness_reward": 0.9510353088378907, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.1491527661681175, "rewards/frontier_coverage_1": 0.1491527661681175, "rewards/frontier_coverage_10": 0.1479087233543396, "rewards/frontier_coverage_15": 0.12765701860189438, "rewards/frontier_coverage_20": 0.08184282779693604, "rewards/frontier_coverage_25": 0.06409678980708122, "rewards/frontier_coverage_5": 0.14910189658403397, "rewards/frontier_entropy_batch_reward": -0.24060723185539246, "signal/accuracy_reward/centered_abs_mean": 0.078125, "signal/accuracy_reward/group_std_mean": 0.10625525563955307, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8084635734558105, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0390625, "signal/advantage_abs_mean": 0.7757280707359314, "signal/advantage_pre_scale_abs_mean": 0.05764142274856567, "signal/advantage_pre_scale_std": 0.09220658987760544, "signal/advantage_std": 0.9824289321899414, "signal/brier_reward/centered_abs_mean": 0.09430547803640366, "signal/brier_reward/group_std_mean": 0.12114822864532471, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.196575266122818, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009430548176169395, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013017821311950683, "signal/confidence_uniqueness_reward/group_std_mean": 0.016292367503046988, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0272565308958292, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013017821591347456, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.148694708943367, "signal/frontier_coverage_0/group_std_mean": 0.19025928378105164, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04428746402263641, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021263343514874578, "signal/frontier_coverage_1/centered_abs_mean": 0.148694708943367, "signal/frontier_coverage_1/group_std_mean": 0.19025928378105164, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04428746402263641, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021263343514874578, "signal/frontier_coverage_10/centered_abs_mean": 0.14685680270195006, "signal/frontier_coverage_10/group_std_mean": 0.18790920376777648, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043737325072288516, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002100052172318101, "signal/frontier_coverage_15/centered_abs_mean": 0.12006380707025528, "signal/frontier_coverage_15/group_std_mean": 0.15359488427639006, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03576338440179825, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017169124213978649, "signal/frontier_coverage_20/centered_abs_mean": 0.0711003676056862, "signal/frontier_coverage_20/group_std_mean": 0.09024541974067687, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021219252794981002, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010167352622374893, "signal/frontier_coverage_25/centered_abs_mean": 0.05128743276000023, "signal/frontier_coverage_25/group_std_mean": 0.06477131098508834, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015384691581130028, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007334102760069072, "signal/frontier_coverage_5/centered_abs_mean": 0.14860089123249054, "signal/frontier_coverage_5/group_std_mean": 0.19013891518115997, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04425964131951332, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021249927347525956, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29118287563323975, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36209931373596194, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6089503169059753, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029118288308382034, "step": 185 }, { "calibration/aurc": 0.19273776348515756, "calibration/batch_distribution_entropy": 0.9611420908727529, "calibration/buffer_distribution_entropy": 0.9947319347562583, "calibration/confidence_entropy": 0.46235385435086274, "calibration/coverage@0%": 0.0953125, "calibration/coverage@1%": 0.103515625, "calibration/coverage@10%": 0.419921875, "calibration/coverage@15%": 0.491796875, "calibration/coverage@20%": 0.5875, "calibration/coverage@25%": 0.665625, "calibration/coverage@30%": 0.7375, "calibration/coverage@5%": 0.235546875, "calibration/ece": 0.09930096783332423, "calibration/mean_confidence": 0.4746575482431852, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 708.8, "completions/max_terminated_length": 708.8, "completions/mean_length": 255.9431640625, "completions/mean_terminated_length": 255.99241638183594, "completions/min_length": 81.2, "completions/min_terminated_length": 135.0, "epoch": 0.608, "grad_norm": 0.007871582172811031, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 638594717.0, "reward": 0.9367780327796936, "reward_std": 0.07506957724690437, "rewards/accuracy_reward": 0.53486328125, "rewards/brier_reward": 0.8349022507667542, "rewards/confidence_uniqueness_reward": 0.9513669490814209, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.16537888646125792, "rewards/frontier_coverage_1": 0.16537888646125792, "rewards/frontier_coverage_10": 0.16384746134281158, "rewards/frontier_coverage_15": 0.13749379813671112, "rewards/frontier_coverage_20": 0.08554163128137589, "rewards/frontier_coverage_25": 0.07167729437351227, "rewards/frontier_coverage_5": 0.16531487107276915, "rewards/frontier_entropy_batch_reward": -0.22834107279777527, "signal/accuracy_reward/centered_abs_mean": 0.085858154296875, "signal/accuracy_reward/group_std_mean": 0.11284001320600509, "signal/accuracy_reward/group_zero_std_frac": 0.68125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9049631834030152, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0429290771484375, "signal/advantage_abs_mean": 0.7692124009132385, "signal/advantage_pre_scale_abs_mean": 0.05844322219491005, "signal/advantage_pre_scale_std": 0.09503102004528045, "signal/advantage_std": 0.9823786616325378, "signal/brier_reward/centered_abs_mean": 0.0926524430513382, "signal/brier_reward/group_std_mean": 0.12010594606399536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19838889837265014, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00926524419337511, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012868665717542171, "signal/confidence_uniqueness_reward/group_std_mean": 0.01655961014330387, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02771872468292713, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001286866539157927, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003949139639735222, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.1529883474111557, "signal/frontier_coverage_0/group_std_mean": 0.19571956098079682, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04671046063303948, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002187733328901231, "signal/frontier_coverage_1/centered_abs_mean": 0.1529883474111557, "signal/frontier_coverage_1/group_std_mean": 0.19571956098079682, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04671046063303948, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002187733328901231, "signal/frontier_coverage_10/centered_abs_mean": 0.1511484533548355, "signal/frontier_coverage_10/group_std_mean": 0.19337283372879027, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04615175053477287, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002161422930657864, "signal/frontier_coverage_15/centered_abs_mean": 0.1199759766459465, "signal/frontier_coverage_15/group_std_mean": 0.15368208587169646, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036673346906900404, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017156564630568028, "signal/frontier_coverage_20/centered_abs_mean": 0.06901527941226959, "signal/frontier_coverage_20/group_std_mean": 0.08763439208269119, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021173715963959693, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009869184694252908, "signal/frontier_coverage_25/centered_abs_mean": 0.05233650431036949, "signal/frontier_coverage_25/group_std_mean": 0.06636691689491273, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016095756366848946, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007484119967557489, "signal/frontier_coverage_5/centered_abs_mean": 0.15288768708705902, "signal/frontier_coverage_5/group_std_mean": 0.19559282064437866, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046679823845624926, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021862938767299054, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28050378561019895, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3514762043952942, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6038827538490296, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0280503798276186, "step": 190 }, { "calibration/aurc": 0.20130243987184096, "calibration/batch_distribution_entropy": 0.9757232849051418, "calibration/buffer_distribution_entropy": 0.9946261313671636, "calibration/confidence_entropy": 0.4755939827672188, "calibration/coverage@0%": 0.050785072162426614, "calibration/coverage@1%": 0.07148819716242662, "calibration/coverage@10%": 0.3278650929549902, "calibration/coverage@15%": 0.4200908145792564, "calibration/coverage@20%": 0.5170040362035225, "calibration/coverage@25%": 0.6205632338551859, "calibration/coverage@30%": 0.7198125611545988, "calibration/coverage@5%": 0.1879410775440313, "calibration/ece": 0.10973041707453461, "calibration/mean_confidence": 0.510571645381397, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 741.8, "completions/max_terminated_length": 741.8, "completions/mean_length": 264.2986328125, "completions/mean_terminated_length": 264.4277404785156, "completions/min_length": 26.6, "completions/min_terminated_length": 141.2, "epoch": 0.624, "grad_norm": 0.008098253048956394, "learning_rate": 1e-06, "loss": -0.0045, "num_tokens": 656645039.0, "reward": 0.9362733840942383, "reward_std": 0.08029964119195938, "rewards/accuracy_reward": 0.53525390625, "rewards/brier_reward": 0.8243496537208557, "rewards/confidence_uniqueness_reward": 0.9524309396743774, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.1459769055247307, "rewards/frontier_coverage_1": 0.1459769055247307, "rewards/frontier_coverage_10": 0.1443769782781601, "rewards/frontier_coverage_15": 0.11724838614463806, "rewards/frontier_coverage_20": 0.07035666406154632, "rewards/frontier_coverage_25": 0.06681257486343384, "rewards/frontier_coverage_5": 0.14591864347457886, "rewards/frontier_entropy_batch_reward": -0.20702989101409913, "signal/accuracy_reward/centered_abs_mean": 0.092120361328125, "signal/accuracy_reward/group_std_mean": 0.11874449849128724, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.922400152683258, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0460601806640625, "signal/advantage_abs_mean": 0.7747326731681824, "signal/advantage_pre_scale_abs_mean": 0.06263215094804764, "signal/advantage_pre_scale_std": 0.10089927017688752, "signal/advantage_std": 0.9825205326080322, "signal/brier_reward/centered_abs_mean": 0.10271832048892975, "signal/brier_reward/group_std_mean": 0.13176209926605226, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20568051934242249, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010271831974387169, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012912808544933796, "signal/confidence_uniqueness_reward/group_std_mean": 0.01798289269208908, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02583295851945877, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012912808684632181, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011222666688263416, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_coverage_0/centered_abs_mean": 0.15500531494617462, "signal/frontier_coverage_0/group_std_mean": 0.1970183253288269, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0443998321890831, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022165759466588495, "signal/frontier_coverage_1/centered_abs_mean": 0.15500531494617462, "signal/frontier_coverage_1/group_std_mean": 0.1970183253288269, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0443998321890831, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022165759466588495, "signal/frontier_coverage_10/centered_abs_mean": 0.1526143193244934, "signal/frontier_coverage_10/group_std_mean": 0.19403861463069916, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.043713013827800754, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021823848597705363, "signal/frontier_coverage_15/centered_abs_mean": 0.11518861204385758, "signal/frontier_coverage_15/group_std_mean": 0.14710773229599, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03298960886895656, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016471970826387405, "signal/frontier_coverage_20/centered_abs_mean": 0.062545096129179, "signal/frontier_coverage_20/group_std_mean": 0.08005284368991852, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017897342145442963, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008943948661908508, "signal/frontier_coverage_25/centered_abs_mean": 0.054150203615427016, "signal/frontier_coverage_25/group_std_mean": 0.0692131370306015, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015493759512901306, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007743479101918638, "signal/frontier_coverage_5/centered_abs_mean": 0.15490354895591735, "signal/frontier_coverage_5/group_std_mean": 0.19688799381256103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.044370852410793304, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002215120755136013, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2715447604656219, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3448778748512268, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5443657517433167, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027154476940631868, "step": 195 }, { "calibration/aurc": 0.2305115593686886, "calibration/batch_distribution_entropy": 0.9769002181990043, "calibration/buffer_distribution_entropy": 0.994425807362342, "calibration/confidence_entropy": 0.47991060140927677, "calibration/coverage@0%": 0.05351868272994129, "calibration/coverage@1%": 0.0726593077299413, "calibration/coverage@10%": 0.3296951443248532, "calibration/coverage@15%": 0.3894607693248532, "calibration/coverage@20%": 0.4332199425146771, "calibration/coverage@25%": 0.5414307118395303, "calibration/coverage@30%": 0.6391083659491195, "calibration/coverage@5%": 0.2496124327299413, "calibration/ece": 0.1474371272941966, "calibration/mean_confidence": 0.5323626348162074, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 737.4, "completions/max_terminated_length": 737.4, "completions/mean_length": 256.04560546875, "completions/mean_terminated_length": 256.2203735351562, "completions/min_length": 54.4, "completions/min_terminated_length": 136.6, "epoch": 0.64, "grad_norm": 0.007591220550239086, "learning_rate": 1e-06, "loss": -0.0053, "num_tokens": 674609634.0, "reward": 0.958326268196106, "reward_std": 0.07369578629732132, "rewards/accuracy_reward": 0.586328125, "rewards/brier_reward": 0.8264079332351685, "rewards/confidence_uniqueness_reward": 0.9515935182571411, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.11021586209535598, "rewards/frontier_coverage_1": 0.11021586209535598, "rewards/frontier_coverage_10": 0.1090992659330368, "rewards/frontier_coverage_15": 0.09044042155146599, "rewards/frontier_coverage_20": 0.0605145275592804, "rewards/frontier_coverage_25": 0.06968777850270272, "rewards/frontier_coverage_5": 0.11018936783075332, "rewards/frontier_entropy_batch_reward": -0.21739307940006256, "signal/accuracy_reward/centered_abs_mean": 0.07176513671875, "signal/accuracy_reward/group_std_mean": 0.09551991671323776, "signal/accuracy_reward/group_zero_std_frac": 0.721875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7490824341773987, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.035882568359375, "signal/advantage_abs_mean": 0.7772216916084289, "signal/advantage_pre_scale_abs_mean": 0.057156357914209366, "signal/advantage_pre_scale_std": 0.09424262046813965, "signal/advantage_std": 0.9824276566505432, "signal/brier_reward/centered_abs_mean": 0.09486225694417953, "signal/brier_reward/group_std_mean": 0.12378767281770706, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19762584567070007, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009486225806176663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013210498169064522, "signal/confidence_uniqueness_reward/group_std_mean": 0.01799871101975441, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027587800472974777, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013210498262196779, "signal/format_reward/centered_abs_mean": 0.001287841796875, "signal/format_reward/group_std_mean": 0.003135160403326154, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012948540598154068, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006439208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.13088381588459014, "signal/frontier_coverage_0/group_std_mean": 0.17006581425666809, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03911969661712646, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018716384656727314, "signal/frontier_coverage_1/centered_abs_mean": 0.13088381588459014, "signal/frontier_coverage_1/group_std_mean": 0.17006581425666809, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03911969661712646, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018716384656727314, "signal/frontier_coverage_10/centered_abs_mean": 0.12875951677560807, "signal/frontier_coverage_10/group_std_mean": 0.16733676493167876, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03848160281777382, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001841261121444404, "signal/frontier_coverage_15/centered_abs_mean": 0.09472260624170303, "signal/frontier_coverage_15/group_std_mean": 0.12351420521736145, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028285817056894303, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013545332476496696, "signal/frontier_coverage_20/centered_abs_mean": 0.054108986258506776, "signal/frontier_coverage_20/group_std_mean": 0.06981250941753388, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0161893917247653, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000773758499417454, "signal/frontier_coverage_25/centered_abs_mean": 0.05493494421243668, "signal/frontier_coverage_25/group_std_mean": 0.0703204944729805, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016481004655361176, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007855696836486459, "signal/frontier_coverage_5/centered_abs_mean": 0.13078642785549163, "signal/frontier_coverage_5/group_std_mean": 0.16993974149227142, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03909059762954712, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018702458590269088, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2803233087062836, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3516668021678925, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5867632031440735, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02803233154118061, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.4308345787167824, "eval_calibration/batch_distribution_entropy": 0.9444042163645358, "eval_calibration/buffer_distribution_entropy": 0.9942015969240219, "eval_calibration/confidence_entropy": 0.4692993798939642, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.140625, "eval_calibration/coverage@25%": 0.28125, "eval_calibration/coverage@30%": 0.3359375, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.1523365805242153, "eval_calibration/mean_confidence": 0.44097538324252694, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 454.0, "eval_completions/max_terminated_length": 454.0, "eval_completions/mean_length": 252.9209976196289, "eval_completions/mean_terminated_length": 252.9209976196289, "eval_completions/min_length": 151.25, "eval_completions/min_terminated_length": 151.25, "eval_loss": 0.0, "eval_num_tokens": 674609634.0, "eval_reward": 0.8108291625976562, "eval_reward_std": 0.22701482847332954, "eval_rewards/accuracy_reward": 0.451171875, "eval_rewards/brier_reward": 0.8127871453762054, "eval_rewards/confidence_uniqueness_reward": 0.894287109375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.19150054454803467, "eval_rewards/frontier_coverage_1": 0.19150054454803467, "eval_rewards/frontier_coverage_10": 0.18698867037892342, "eval_rewards/frontier_coverage_15": 0.13827473297715187, "eval_rewards/frontier_coverage_20": 0.06924514845013618, "eval_rewards/frontier_coverage_25": 0.04760201275348663, "eval_rewards/frontier_coverage_5": 0.1913793683052063, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 22.9444, "eval_samples_per_second": 21.792, "eval_signal/accuracy_reward/centered_abs_mean": 0.4730224609375, "eval_signal/accuracy_reward/group_std_mean": 0.4935734122991562, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0439613461494446, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23651123046875, "eval_signal/advantage_abs_mean": 0.9394318014383316, "eval_signal/advantage_pre_scale_abs_mean": 0.21379049867391586, "eval_signal/advantage_pre_scale_std": 0.2245732806622982, "eval_signal/advantage_std": 0.9876820743083954, "eval_signal/brier_reward/centered_abs_mean": 0.17406537756323814, "eval_signal/brier_reward/group_std_mean": 0.2270943932235241, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07685280591249466, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01740653719753027, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0407257080078125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.049375214613974094, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017996263224631548, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004072570824064314, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.36413776874542236, "eval_signal/frontier_coverage_0/group_std_mean": 0.432192362844944, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.023011908400803804, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0052071703830733895, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36413776874542236, "eval_signal/frontier_coverage_1/group_std_mean": 0.432192362844944, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.023011908400803804, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0052071703830733895, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3524700105190277, "eval_signal/frontier_coverage_10/group_std_mean": 0.418954998254776, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022273984737694263, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005040321149863303, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2525060772895813, "eval_signal/frontier_coverage_15/group_std_mean": 0.30542421340942383, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01595612964592874, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0036108369240537286, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10488817654550076, "eval_signal/frontier_coverage_20/group_std_mean": 0.133793443441391, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006626064772717655, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014999008853919804, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.07727677188813686, "eval_signal/frontier_coverage_25/group_std_mean": 0.10409998148679733, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004873032798059285, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001105057803215459, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3638596907258034, "eval_signal/frontier_coverage_5/group_std_mean": 0.4318847507238388, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022994326427578926, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00520319351926446, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.174, "step": 200 }, { "calibration/aurc": 0.41555223691353416, "calibration/batch_distribution_entropy": 0.9703810471803284, "calibration/buffer_distribution_entropy": 0.9944921143370096, "calibration/confidence_entropy": 0.5089773199690188, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.013671875, "calibration/coverage@15%": 0.026953125, "calibration/coverage@20%": 0.10625, "calibration/coverage@25%": 0.233203125, "calibration/coverage@30%": 0.372265625, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.10794217888237238, "calibration/mean_confidence": 0.4368189563932054, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 752.4, "completions/max_terminated_length": 752.4, "completions/mean_length": 245.75146484375, "completions/mean_terminated_length": 245.77511901855468, "completions/min_length": 101.0, "completions/min_terminated_length": 128.8, "epoch": 0.656, "grad_norm": 0.010888704098761082, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 691982673.0, "reward": 0.9192820072174073, "reward_std": 0.07650423496961593, "rewards/accuracy_reward": 0.50830078125, "rewards/brier_reward": 0.7998928785324096, "rewards/confidence_uniqueness_reward": 0.9526479959487915, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1341045081615448, "rewards/frontier_coverage_1": 0.1341045081615448, "rewards/frontier_coverage_10": 0.13056059628725053, "rewards/frontier_coverage_15": 0.09860608130693435, "rewards/frontier_coverage_20": 0.05775674730539322, "rewards/frontier_coverage_25": 0.052621806412935256, "rewards/frontier_coverage_5": 0.13404361754655839, "rewards/frontier_entropy_batch_reward": -0.20681337118148804, "signal/accuracy_reward/centered_abs_mean": 0.080706787109375, "signal/accuracy_reward/group_std_mean": 0.11042787879705429, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8209300398826599, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0403533935546875, "signal/advantage_abs_mean": 0.7633422970771789, "signal/advantage_pre_scale_abs_mean": 0.05868617594242096, "signal/advantage_pre_scale_std": 0.09539144784212113, "signal/advantage_std": 0.9824797153472901, "signal/brier_reward/centered_abs_mean": 0.10599584430456162, "signal/brier_reward/group_std_mean": 0.135506734251976, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21632728576660157, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010599584691226483, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011845141276717186, "signal/confidence_uniqueness_reward/group_std_mean": 0.01510525420308113, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024192561581730842, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011845141649246215, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019330549985170364, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1526343673467636, "signal/frontier_coverage_0/group_std_mean": 0.19555696845054626, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04452885463833809, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021826714277267457, "signal/frontier_coverage_1/centered_abs_mean": 0.1526343673467636, "signal/frontier_coverage_1/group_std_mean": 0.19555696845054626, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04452885463833809, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021826714277267457, "signal/frontier_coverage_10/centered_abs_mean": 0.14827150702476502, "signal/frontier_coverage_10/group_std_mean": 0.18989112377166747, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04325413852930069, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002120282547548413, "signal/frontier_coverage_15/centered_abs_mean": 0.10958008021116257, "signal/frontier_coverage_15/group_std_mean": 0.14022946059703828, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03196649923920632, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001566995191387832, "signal/frontier_coverage_20/centered_abs_mean": 0.060041727125644685, "signal/frontier_coverage_20/group_std_mean": 0.07666357308626175, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017537441104650497, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008585967007093132, "signal/frontier_coverage_25/centered_abs_mean": 0.052678339183330536, "signal/frontier_coverage_25/group_std_mean": 0.06774556338787079, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015397872030735015, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007533002295531333, "signal/frontier_coverage_5/centered_abs_mean": 0.15252876728773118, "signal/frontier_coverage_5/group_std_mean": 0.19542471468448638, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04449806213378906, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021811614045873285, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2632203996181488, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3332472801208496, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5378780126571655, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02632203996181488, "step": 205 }, { "calibration/aurc": 0.27446644817007665, "calibration/batch_distribution_entropy": 0.9767794518788089, "calibration/buffer_distribution_entropy": 0.9953144343128398, "calibration/confidence_entropy": 0.46897802364283125, "calibration/coverage@0%": 0.021105216487279845, "calibration/coverage@1%": 0.07462084148727985, "calibration/coverage@10%": 0.21842129403131114, "calibration/coverage@15%": 0.24773039995107632, "calibration/coverage@20%": 0.3294084821428571, "calibration/coverage@25%": 0.44349697284735806, "calibration/coverage@30%": 0.518525256849315, "calibration/coverage@5%": 0.15626146648727984, "calibration/ece": 0.1646093725597769, "calibration/mean_confidence": 0.4924234660935678, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 602.2, "completions/max_terminated_length": 602.2, "completions/mean_length": 240.45693359375, "completions/mean_terminated_length": 240.5041259765625, "completions/min_length": 101.8, "completions/min_terminated_length": 129.8, "epoch": 0.672, "grad_norm": 0.008477822877466679, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 709358392.0, "reward": 0.9301062107086182, "reward_std": 0.07615272402763366, "rewards/accuracy_reward": 0.529296875, "rewards/brier_reward": 0.8106651425361633, "rewards/confidence_uniqueness_reward": 0.950485908985138, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.1491150051355362, "rewards/frontier_coverage_1": 0.1491150051355362, "rewards/frontier_coverage_10": 0.14698787182569503, "rewards/frontier_coverage_15": 0.11903707012534141, "rewards/frontier_coverage_20": 0.07292983531951905, "rewards/frontier_coverage_25": 0.0670611746609211, "rewards/frontier_coverage_5": 0.1490369975566864, "rewards/frontier_entropy_batch_reward": -0.2276163637638092, "signal/accuracy_reward/centered_abs_mean": 0.08685302734375, "signal/accuracy_reward/group_std_mean": 0.11438945978879929, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9162230610847473, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.043426513671875, "signal/advantage_abs_mean": 0.7653620719909668, "signal/advantage_pre_scale_abs_mean": 0.05935278162360191, "signal/advantage_pre_scale_std": 0.09651183784008026, "signal/advantage_std": 0.9823976397514343, "signal/brier_reward/centered_abs_mean": 0.10704858005046844, "signal/brier_reward/group_std_mean": 0.13854455947875977, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22724690437316894, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010704858414828777, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013841424137353897, "signal/confidence_uniqueness_reward/group_std_mean": 0.017753540351986886, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029474389180541037, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001384142437018454, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004257309436798096, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.15937035381793976, "signal/frontier_coverage_0/group_std_mean": 0.20475198030471803, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04841887578368187, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022789960727095603, "signal/frontier_coverage_1/centered_abs_mean": 0.15937035381793976, "signal/frontier_coverage_1/group_std_mean": 0.20475198030471803, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04841887578368187, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022789960727095603, "signal/frontier_coverage_10/centered_abs_mean": 0.15666671991348266, "signal/frontier_coverage_10/group_std_mean": 0.20127066373825073, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.047596973925828935, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022403340321034194, "signal/frontier_coverage_15/centered_abs_mean": 0.11362015008926392, "signal/frontier_coverage_15/group_std_mean": 0.14626134932041168, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03458226881921291, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016247681109234692, "signal/frontier_coverage_20/centered_abs_mean": 0.06491810157895088, "signal/frontier_coverage_20/group_std_mean": 0.08258429169654846, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019771148264408112, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009283288381993771, "signal/frontier_coverage_25/centered_abs_mean": 0.05772334411740303, "signal/frontier_coverage_25/group_std_mean": 0.07298219352960586, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017550046369433404, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008254437940195203, "signal/frontier_coverage_5/centered_abs_mean": 0.1592436820268631, "signal/frontier_coverage_5/group_std_mean": 0.20459164381027223, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04838085174560547, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002277184650301933, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27080374360084536, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34516674280166626, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5760879635810852, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027080375328660013, "step": 210 }, { "calibration/aurc": 0.2857587551102182, "calibration/batch_distribution_entropy": 0.9700236130811619, "calibration/buffer_distribution_entropy": 0.9955469989841135, "calibration/confidence_entropy": 0.5117476351469353, "calibration/coverage@0%": 0.005859375, "calibration/coverage@1%": 0.005859375, "calibration/coverage@10%": 0.157421875, "calibration/coverage@15%": 0.22735600490196078, "calibration/coverage@20%": 0.45704656862745097, "calibration/coverage@25%": 0.5449647671568627, "calibration/coverage@30%": 0.6262408088235294, "calibration/coverage@5%": 0.0859375, "calibration/ece": 0.1445287858422077, "calibration/mean_confidence": 0.48163763612065813, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 560.4, "completions/max_terminated_length": 560.4, "completions/mean_length": 242.11982421875, "completions/mean_terminated_length": 242.16812744140626, "completions/min_length": 102.0, "completions/min_terminated_length": 129.4, "epoch": 0.688, "grad_norm": 0.008597953245043755, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 726791619.0, "reward": 0.9433683037757874, "reward_std": 0.07582886517047882, "rewards/accuracy_reward": 0.55654296875, "rewards/brier_reward": 0.8110973238945007, "rewards/confidence_uniqueness_reward": 0.9522903680801391, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.12071355283260346, "rewards/frontier_coverage_1": 0.12071355283260346, "rewards/frontier_coverage_10": 0.11720674857497215, "rewards/frontier_coverage_15": 0.08717693164944648, "rewards/frontier_coverage_20": 0.057222628593444826, "rewards/frontier_coverage_25": 0.05811392888426781, "rewards/frontier_coverage_5": 0.12064254283905029, "rewards/frontier_entropy_batch_reward": -0.20845062732696534, "signal/accuracy_reward/centered_abs_mean": 0.082000732421875, "signal/accuracy_reward/group_std_mean": 0.1131775364279747, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8328173041343689, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0410003662109375, "signal/advantage_abs_mean": 0.7577017068862915, "signal/advantage_pre_scale_abs_mean": 0.05770004838705063, "signal/advantage_pre_scale_std": 0.09458618760108947, "signal/advantage_std": 0.9824642658233642, "signal/brier_reward/centered_abs_mean": 0.09881015568971634, "signal/brier_reward/group_std_mean": 0.1283961772918701, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2029067099094391, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009881015866994858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012166624888777733, "signal/confidence_uniqueness_reward/group_std_mean": 0.015853742510080336, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025115343555808068, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001216662465594709, "signal/format_reward/centered_abs_mean": 0.000555419921875, "signal/format_reward/group_std_mean": 0.0013209730386734009, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.005849538929760456, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002777099609375, "signal/frontier_coverage_0/centered_abs_mean": 0.1484951466321945, "signal/frontier_coverage_0/group_std_mean": 0.1917928636074066, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04357870742678642, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002123480476438999, "signal/frontier_coverage_1/centered_abs_mean": 0.1484951466321945, "signal/frontier_coverage_1/group_std_mean": 0.1917928636074066, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04357870742678642, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002123480476438999, "signal/frontier_coverage_10/centered_abs_mean": 0.1446128100156784, "signal/frontier_coverage_10/group_std_mean": 0.18671999275684356, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04244330748915672, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002067963080480695, "signal/frontier_coverage_15/centered_abs_mean": 0.10260212272405625, "signal/frontier_coverage_15/group_std_mean": 0.13325890451669692, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030202661454677582, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014672103570774198, "signal/frontier_coverage_20/centered_abs_mean": 0.058193116635084155, "signal/frontier_coverage_20/group_std_mean": 0.0752414420247078, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017166363447904585, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008321615867316722, "signal/frontier_coverage_25/centered_abs_mean": 0.051832232624292374, "signal/frontier_coverage_25/group_std_mean": 0.06646927148103714, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015283860266208649, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000741200940683484, "signal/frontier_coverage_5/centered_abs_mean": 0.14837707877159118, "signal/frontier_coverage_5/group_std_mean": 0.19164316952228547, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0435446061193943, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021217921981588005, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2712122559547424, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3420302629470825, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5576587617397308, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027121226489543914, "step": 215 }, { "calibration/aurc": 0.20254961526086906, "calibration/batch_distribution_entropy": 0.9618709855860214, "calibration/buffer_distribution_entropy": 0.9959088849568218, "calibration/confidence_entropy": 0.45951236349176466, "calibration/coverage@0%": 0.05703125, "calibration/coverage@1%": 0.069140625, "calibration/coverage@10%": 0.300390625, "calibration/coverage@15%": 0.384375, "calibration/coverage@20%": 0.552734375, "calibration/coverage@25%": 0.644921875, "calibration/coverage@30%": 0.731640625, "calibration/coverage@5%": 0.203125, "calibration/ece": 0.09126460030129198, "calibration/mean_confidence": 0.5245861508927346, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 526.0, "completions/max_terminated_length": 526.0, "completions/mean_length": 237.3544921875, "completions/mean_terminated_length": 237.3544921875, "completions/min_length": 128.4, "completions/min_terminated_length": 128.4, "epoch": 0.704, "grad_norm": 0.007751199882477522, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 744088273.0, "reward": 0.942232632637024, "reward_std": 0.07207171618938446, "rewards/accuracy_reward": 0.54990234375, "rewards/brier_reward": 0.8264536499977112, "rewards/confidence_uniqueness_reward": 0.9524115085601806, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1335995927453041, "rewards/frontier_coverage_1": 0.1335995927453041, "rewards/frontier_coverage_10": 0.130966417491436, "rewards/frontier_coverage_15": 0.09904391467571258, "rewards/frontier_coverage_20": 0.06477966532111168, "rewards/frontier_coverage_25": 0.06846426129341125, "rewards/frontier_coverage_5": 0.13356347233057023, "rewards/frontier_entropy_batch_reward": -0.21481671035289765, "signal/accuracy_reward/centered_abs_mean": 0.070770263671875, "signal/accuracy_reward/group_std_mean": 0.09581695944070816, "signal/accuracy_reward/group_zero_std_frac": 0.7125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7298851132392883, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0353851318359375, "signal/advantage_abs_mean": 0.7701761364936829, "signal/advantage_pre_scale_abs_mean": 0.055718979239463805, "signal/advantage_pre_scale_std": 0.09126545041799546, "signal/advantage_std": 0.9824565052986145, "signal/brier_reward/centered_abs_mean": 0.0969886377453804, "signal/brier_reward/group_std_mean": 0.12564177215099334, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2006084829568863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009698864258825778, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011934367194771766, "signal/confidence_uniqueness_reward/group_std_mean": 0.015236479230225086, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024650559201836585, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011934367474168539, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020299691706895827, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1355642005801201, "signal/frontier_coverage_0/group_std_mean": 0.1746540993452072, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04006961435079574, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019385680090636015, "signal/frontier_coverage_1/centered_abs_mean": 0.1355642005801201, "signal/frontier_coverage_1/group_std_mean": 0.1746540993452072, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04006961435079574, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019385680090636015, "signal/frontier_coverage_10/centered_abs_mean": 0.13053236603736879, "signal/frontier_coverage_10/group_std_mean": 0.16829843819141388, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03858681917190552, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001866612839512527, "signal/frontier_coverage_15/centered_abs_mean": 0.09063916206359864, "signal/frontier_coverage_15/group_std_mean": 0.11711875647306443, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026824329048395157, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012961399741470813, "signal/frontier_coverage_20/centered_abs_mean": 0.05672153383493424, "signal/frontier_coverage_20/group_std_mean": 0.07227804362773896, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01677936241030693, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000811117934063077, "signal/frontier_coverage_25/centered_abs_mean": 0.05697656720876694, "signal/frontier_coverage_25/group_std_mean": 0.07198350727558137, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016841649636626245, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008147649001330137, "signal/frontier_coverage_5/centered_abs_mean": 0.13546179682016374, "signal/frontier_coverage_5/group_std_mean": 0.1745290517807007, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040039440244436265, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019371037138625979, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2767252385616302, "signal/frontier_entropy_batch_reward/group_std_mean": 0.351086288690567, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5704930663108826, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027672524750232696, "step": 220 }, { "calibration/aurc": 0.23497755005648022, "calibration/batch_distribution_entropy": 0.9809308019215919, "calibration/buffer_distribution_entropy": 0.9957884730343161, "calibration/confidence_entropy": 0.48714494304938716, "calibration/coverage@0%": 0.03984375, "calibration/coverage@1%": 0.03984375, "calibration/coverage@10%": 0.21796875, "calibration/coverage@15%": 0.287109375, "calibration/coverage@20%": 0.480078125, "calibration/coverage@25%": 0.612109375, "calibration/coverage@30%": 0.7046875, "calibration/coverage@5%": 0.166015625, "calibration/ece": 0.14858386498676532, "calibration/mean_confidence": 0.4596200044797, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 515.0, "completions/max_terminated_length": 515.0, "completions/mean_length": 240.1232421875, "completions/mean_terminated_length": 240.14661254882813, "completions/min_length": 103.8, "completions/min_terminated_length": 128.0, "epoch": 0.72, "grad_norm": 0.009818264283239841, "learning_rate": 1e-06, "loss": 0.003, "num_tokens": 761556991.0, "reward": 0.9482641577720642, "reward_std": 0.07391180843114853, "rewards/accuracy_reward": 0.56865234375, "rewards/brier_reward": 0.8173715233802795, "rewards/confidence_uniqueness_reward": 0.9519086122512818, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.11691680550575256, "rewards/frontier_coverage_1": 0.11691680550575256, "rewards/frontier_coverage_10": 0.11170322000980377, "rewards/frontier_coverage_15": 0.08474379926919937, "rewards/frontier_coverage_20": 0.05883038938045502, "rewards/frontier_coverage_25": 0.06428172141313553, "rewards/frontier_coverage_5": 0.11694350391626358, "rewards/frontier_entropy_batch_reward": -0.2252698600292206, "signal/accuracy_reward/centered_abs_mean": 0.079315185546875, "signal/accuracy_reward/group_std_mean": 0.10617940872907639, "signal/accuracy_reward/group_zero_std_frac": 0.6875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8031948566436767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0396575927734375, "signal/advantage_abs_mean": 0.76589115858078, "signal/advantage_pre_scale_abs_mean": 0.057143434882164, "signal/advantage_pre_scale_std": 0.09222103357315063, "signal/advantage_std": 0.9824912071228027, "signal/brier_reward/centered_abs_mean": 0.09368100613355637, "signal/brier_reward/group_std_mean": 0.12197220623493195, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1903090626001358, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009368100762367248, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012216830253601074, "signal/confidence_uniqueness_reward/group_std_mean": 0.015557673759758472, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024774272739887238, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012216830858960749, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002020454406738281, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14588095843791962, "signal/frontier_coverage_0/group_std_mean": 0.1874998241662979, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042351162433624266, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020860977238044143, "signal/frontier_coverage_1/centered_abs_mean": 0.14588095843791962, "signal/frontier_coverage_1/group_std_mean": 0.1874998241662979, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042351162433624266, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020860977238044143, "signal/frontier_coverage_10/centered_abs_mean": 0.13653019070625305, "signal/frontier_coverage_10/group_std_mean": 0.1753913700580597, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03963543772697449, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019523817114531993, "signal/frontier_coverage_15/centered_abs_mean": 0.09215132296085357, "signal/frontier_coverage_15/group_std_mean": 0.11820129603147507, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026709938794374465, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013177639339119196, "signal/frontier_coverage_20/centered_abs_mean": 0.0542985163629055, "signal/frontier_coverage_20/group_std_mean": 0.06933169215917587, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015776522643864154, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007764688110910356, "signal/frontier_coverage_25/centered_abs_mean": 0.05071000531315804, "signal/frontier_coverage_25/group_std_mean": 0.06490004062652588, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014756758883595466, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007251530652865768, "signal/frontier_coverage_5/centered_abs_mean": 0.14580158591270448, "signal/frontier_coverage_5/group_std_mean": 0.18739546239376068, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04232985600829124, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020849626045674084, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.28452731370925904, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3587178647518158, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5762077331542969, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02845273055136204, "step": 225 }, { "calibration/aurc": 0.20461066553526025, "calibration/batch_distribution_entropy": 0.9639326995442499, "calibration/buffer_distribution_entropy": 0.996154304026672, "calibration/confidence_entropy": 0.4545212953812185, "calibration/coverage@0%": 0.02265625, "calibration/coverage@1%": 0.02265625, "calibration/coverage@10%": 0.14296875, "calibration/coverage@15%": 0.320703125, "calibration/coverage@20%": 0.5828125, "calibration/coverage@25%": 0.7171875, "calibration/coverage@30%": 0.837109375, "calibration/coverage@5%": 0.083984375, "calibration/ece": 0.1254218466088354, "calibration/mean_confidence": 0.5545885902527345, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 636.4, "completions/max_terminated_length": 636.4, "completions/mean_length": 249.5712890625, "completions/mean_terminated_length": 249.5712890625, "completions/min_length": 138.6, "completions/min_terminated_length": 138.6, "epoch": 0.736, "grad_norm": 0.011151596903800964, "learning_rate": 1e-06, "loss": 0.0023, "num_tokens": 779052185.0, "reward": 0.9520087242126465, "reward_std": 0.07401313185691834, "rewards/accuracy_reward": 0.5802734375, "rewards/brier_reward": 0.8151456475257873, "rewards/confidence_uniqueness_reward": 0.9502784729003906, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.11596761420369148, "rewards/frontier_coverage_1": 0.11596761420369148, "rewards/frontier_coverage_10": 0.11190555989742279, "rewards/frontier_coverage_15": 0.08655463755130768, "rewards/frontier_coverage_20": 0.0668842189013958, "rewards/frontier_coverage_25": 0.07651213482022286, "rewards/frontier_coverage_5": 0.11595459505915642, "rewards/frontier_entropy_batch_reward": -0.24533769488334656, "signal/accuracy_reward/centered_abs_mean": 0.076953125, "signal/accuracy_reward/group_std_mean": 0.10131202191114426, "signal/accuracy_reward/group_zero_std_frac": 0.709375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.805925703048706, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0384765625, "signal/advantage_abs_mean": 0.7833456873893738, "signal/advantage_pre_scale_abs_mean": 0.0583541601896286, "signal/advantage_pre_scale_std": 0.09414290338754654, "signal/advantage_std": 0.9824194788932801, "signal/brier_reward/centered_abs_mean": 0.10101247578859329, "signal/brier_reward/group_std_mean": 0.12959804385900497, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21122341156005858, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010101247392594815, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013729357719421386, "signal/confidence_uniqueness_reward/group_std_mean": 0.017146859876811506, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028852657228708268, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013729357859119772, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.1417178988456726, "signal/frontier_coverage_0/group_std_mean": 0.18116957545280457, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04281155541539192, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002026565885171294, "signal/frontier_coverage_1/centered_abs_mean": 0.1417178988456726, "signal/frontier_coverage_1/group_std_mean": 0.18116957545280457, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04281155541539192, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002026565885171294, "signal/frontier_coverage_10/centered_abs_mean": 0.13176652491092683, "signal/frontier_coverage_10/group_std_mean": 0.1686902552843094, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.039776308834552764, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018842613324522972, "signal/frontier_coverage_15/centered_abs_mean": 0.08698353171348572, "signal/frontier_coverage_15/group_std_mean": 0.11125607341527939, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026212720572948454, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012438645120710135, "signal/frontier_coverage_20/centered_abs_mean": 0.05746523961424828, "signal/frontier_coverage_20/group_std_mean": 0.07287896871566772, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017264867946505547, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008217529277317226, "signal/frontier_coverage_25/centered_abs_mean": 0.058684618771076204, "signal/frontier_coverage_25/group_std_mean": 0.07420662641525269, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017552951723337172, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008391900104470551, "signal/frontier_coverage_5/centered_abs_mean": 0.14159742891788482, "signal/frontier_coverage_5/group_std_mean": 0.18101400136947632, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04277472048997879, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020248432643711566, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2850883662700653, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35867486596107484, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5991427898406982, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0285088375210762, "step": 230 }, { "calibration/aurc": 0.25544721535261844, "calibration/batch_distribution_entropy": 0.9757735884606185, "calibration/buffer_distribution_entropy": 0.996224589318703, "calibration/confidence_entropy": 0.46901248682433605, "calibration/coverage@0%": 0.018359375, "calibration/coverage@1%": 0.018359375, "calibration/coverage@10%": 0.16015625, "calibration/coverage@15%": 0.334375, "calibration/coverage@20%": 0.468359375, "calibration/coverage@25%": 0.575390625, "calibration/coverage@30%": 0.654296875, "calibration/coverage@5%": 0.0671875, "calibration/ece": 0.11607450062335481, "calibration/mean_confidence": 0.48190249004650265, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 610.6, "completions/max_terminated_length": 610.6, "completions/mean_length": 242.105859375, "completions/mean_terminated_length": 242.105859375, "completions/min_length": 134.6, "completions/min_terminated_length": 134.6, "epoch": 0.752, "grad_norm": 0.013876860029995441, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 796758549.0, "reward": 0.9462219715118408, "reward_std": 0.07667578011751175, "rewards/accuracy_reward": 0.56220703125, "rewards/brier_reward": 0.8160680532455444, "rewards/confidence_uniqueness_reward": 0.951573121547699, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.1251913160085678, "rewards/frontier_coverage_1": 0.1251913160085678, "rewards/frontier_coverage_10": 0.1213041938841343, "rewards/frontier_coverage_15": 0.08119001314043998, "rewards/frontier_coverage_20": 0.059100668504834176, "rewards/frontier_coverage_25": 0.06714674010872841, "rewards/frontier_coverage_5": 0.1251828819513321, "rewards/frontier_entropy_batch_reward": -0.2166842043399811, "signal/accuracy_reward/centered_abs_mean": 0.078961181640625, "signal/accuracy_reward/group_std_mean": 0.10518565624952317, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8214090228080749, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0394805908203125, "signal/advantage_abs_mean": 0.7728389739990235, "signal/advantage_pre_scale_abs_mean": 0.059433307498693466, "signal/advantage_pre_scale_std": 0.09831408560276031, "signal/advantage_std": 0.9824405074119568, "signal/brier_reward/centered_abs_mean": 0.09824747443199158, "signal/brier_reward/group_std_mean": 0.12751171737909317, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20607516467571257, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009824748151004314, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012649696692824364, "signal/confidence_uniqueness_reward/group_std_mean": 0.016062306985259057, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02607582099735737, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012649696320295334, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001700025051832199, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13381745666265488, "signal/frontier_coverage_0/group_std_mean": 0.1727729856967926, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.040107411891222, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019135896116495132, "signal/frontier_coverage_1/centered_abs_mean": 0.13381745666265488, "signal/frontier_coverage_1/group_std_mean": 0.1727729856967926, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.040107411891222, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019135896116495132, "signal/frontier_coverage_10/centered_abs_mean": 0.12753710001707078, "signal/frontier_coverage_10/group_std_mean": 0.16487825214862822, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0382267102599144, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001823780476115644, "signal/frontier_coverage_15/centered_abs_mean": 0.08149610757827759, "signal/frontier_coverage_15/group_std_mean": 0.10600130110979081, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024482429772615433, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011653943452984094, "signal/frontier_coverage_20/centered_abs_mean": 0.05388506054878235, "signal/frontier_coverage_20/group_std_mean": 0.06884423345327377, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01612280998378992, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000770556356292218, "signal/frontier_coverage_25/centered_abs_mean": 0.0581989660859108, "signal/frontier_coverage_25/group_std_mean": 0.07418040782213212, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017351610027253626, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008322452078573406, "signal/frontier_coverage_5/centered_abs_mean": 0.1337724283337593, "signal/frontier_coverage_5/group_std_mean": 0.17271509468555452, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04009381532669067, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019129457185044884, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2703861802816391, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3440077781677246, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5595187842845917, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02703861817717552, "step": 235 }, { "calibration/aurc": 0.21897409183336064, "calibration/batch_distribution_entropy": 0.9783713011332612, "calibration/buffer_distribution_entropy": 0.9962282854215292, "calibration/confidence_entropy": 0.5118263385716734, "calibration/coverage@0%": 0.13413879036203522, "calibration/coverage@1%": 0.18923449730919764, "calibration/coverage@10%": 0.32562912793542076, "calibration/coverage@15%": 0.39518025318003913, "calibration/coverage@20%": 0.4823186766144814, "calibration/coverage@25%": 0.5889914077788649, "calibration/coverage@30%": 0.6902007399706458, "calibration/coverage@5%": 0.255272290851272, "calibration/ece": 0.14865977206824074, "calibration/mean_confidence": 0.48145361429484357, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 559.4, "completions/max_terminated_length": 559.4, "completions/mean_length": 248.37353515625, "completions/mean_terminated_length": 248.39793090820314, "completions/min_length": 100.8, "completions/min_terminated_length": 127.6, "epoch": 0.768, "grad_norm": 0.008506418205797672, "learning_rate": 1e-06, "loss": 0.002, "num_tokens": 814234598.0, "reward": 0.9303049683570862, "reward_std": 0.0718627542257309, "rewards/accuracy_reward": 0.5232421875, "rewards/brier_reward": 0.8212167739868164, "rewards/confidence_uniqueness_reward": 0.9525408267974853, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.14440589100122453, "rewards/frontier_coverage_1": 0.14440589100122453, "rewards/frontier_coverage_10": 0.14087323248386383, "rewards/frontier_coverage_15": 0.09991578608751298, "rewards/frontier_coverage_20": 0.05962400585412979, "rewards/frontier_coverage_25": 0.057767481356859204, "rewards/frontier_coverage_5": 0.14436377733945846, "rewards/frontier_entropy_batch_reward": -0.19959425628185273, "signal/accuracy_reward/centered_abs_mean": 0.069140625, "signal/accuracy_reward/group_std_mean": 0.09864080995321274, "signal/accuracy_reward/group_zero_std_frac": 0.696875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7402550339698791, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0345703125, "signal/advantage_abs_mean": 0.7564525723457336, "signal/advantage_pre_scale_abs_mean": 0.05381500422954559, "signal/advantage_pre_scale_std": 0.09075729846954346, "signal/advantage_std": 0.9823861718177795, "signal/brier_reward/centered_abs_mean": 0.09333090484142303, "signal/brier_reward/group_std_mean": 0.12142147421836853, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19983896911144255, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009333090484142303, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01187050472944975, "signal/confidence_uniqueness_reward/group_std_mean": 0.01518601570278406, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025268430635333062, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011870504822582006, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0019358094781637193, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1401743471622467, "signal/frontier_coverage_0/group_std_mean": 0.182624551653862, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04287303537130356, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020044931676238774, "signal/frontier_coverage_1/centered_abs_mean": 0.1401743471622467, "signal/frontier_coverage_1/group_std_mean": 0.182624551653862, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04287303537130356, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020044931676238774, "signal/frontier_coverage_10/centered_abs_mean": 0.13549837470054626, "signal/frontier_coverage_10/group_std_mean": 0.1764853775501251, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04146175310015678, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019376267679035663, "signal/frontier_coverage_15/centered_abs_mean": 0.08775650560855866, "signal/frontier_coverage_15/group_std_mean": 0.1143747478723526, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02687869928777218, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012549180304631592, "signal/frontier_coverage_20/centered_abs_mean": 0.05184945985674858, "signal/frontier_coverage_20/group_std_mean": 0.06713635325431824, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01587141491472721, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007414473104290665, "signal/frontier_coverage_25/centered_abs_mean": 0.04992828816175461, "signal/frontier_coverage_25/group_std_mean": 0.06476413011550904, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015272756479680539, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007139745284803212, "signal/frontier_coverage_5/centered_abs_mean": 0.14012694954872132, "signal/frontier_coverage_5/group_std_mean": 0.18256248235702516, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04285847619175911, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002003815281204879, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2622542232275009, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3334097921848297, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5582247734069824, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026225423067808153, "step": 240 }, { "calibration/aurc": 0.28529336333921895, "calibration/batch_distribution_entropy": 0.9747004335667775, "calibration/buffer_distribution_entropy": 0.9962397116658449, "calibration/confidence_entropy": 0.4703582602948983, "calibration/coverage@0%": 0.014453125, "calibration/coverage@1%": 0.064453125, "calibration/coverage@10%": 0.233203125, "calibration/coverage@15%": 0.277734375, "calibration/coverage@20%": 0.3390625, "calibration/coverage@25%": 0.440234375, "calibration/coverage@30%": 0.570703125, "calibration/coverage@5%": 0.1765625, "calibration/ece": 0.15734434185574941, "calibration/mean_confidence": 0.5264813330703859, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 519.2, "completions/max_terminated_length": 519.2, "completions/mean_length": 245.15859375, "completions/mean_terminated_length": 245.18346557617187, "completions/min_length": 108.4, "completions/min_terminated_length": 135.8, "epoch": 0.784, "grad_norm": 0.00773618882521987, "learning_rate": 1e-06, "loss": 0.0039, "num_tokens": 831919390.0, "reward": 0.9443397045135498, "reward_std": 0.07677196860313415, "rewards/accuracy_reward": 0.56416015625, "rewards/brier_reward": 0.7939793109893799, "rewards/confidence_uniqueness_reward": 0.952772068977356, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.09836947172880173, "rewards/frontier_coverage_1": 0.09836947172880173, "rewards/frontier_coverage_10": 0.0951840840280056, "rewards/frontier_coverage_15": 0.07212830930948258, "rewards/frontier_coverage_20": 0.05310492143034935, "rewards/frontier_coverage_25": 0.06044500917196274, "rewards/frontier_coverage_5": 0.09833909720182418, "rewards/frontier_entropy_batch_reward": -0.2060262978076935, "signal/accuracy_reward/centered_abs_mean": 0.083209228515625, "signal/accuracy_reward/group_std_mean": 0.11050355583429336, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8190797328948974, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0416046142578125, "signal/advantage_abs_mean": 0.7678187370300293, "signal/advantage_pre_scale_abs_mean": 0.05949816852807999, "signal/advantage_pre_scale_std": 0.09607118517160415, "signal/advantage_std": 0.982481837272644, "signal/brier_reward/centered_abs_mean": 0.10488737523555755, "signal/brier_reward/group_std_mean": 0.134664386510849, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21070242822170257, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010488738119602204, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01203925535082817, "signal/confidence_uniqueness_reward/group_std_mean": 0.015293254517018795, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024537032842636107, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012039255816489458, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021954655647277834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.14875861406326293, "signal/frontier_coverage_0/group_std_mean": 0.18895536065101623, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04271491318941116, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002127248211763799, "signal/frontier_coverage_1/centered_abs_mean": 0.14875861406326293, "signal/frontier_coverage_1/group_std_mean": 0.18895536065101623, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04271491318941116, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002127248211763799, "signal/frontier_coverage_10/centered_abs_mean": 0.14137446880340576, "signal/frontier_coverage_10/group_std_mean": 0.17956486344337463, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04061263874173164, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020216548582538962, "signal/frontier_coverage_15/centered_abs_mean": 0.08850065022706985, "signal/frontier_coverage_15/group_std_mean": 0.11260762810707092, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025534508749842644, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012655592989176511, "signal/frontier_coverage_20/centered_abs_mean": 0.0558601513504982, "signal/frontier_coverage_20/group_std_mean": 0.0709412157535553, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01617070809006691, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007988001452758908, "signal/frontier_coverage_25/centered_abs_mean": 0.056756097823381424, "signal/frontier_coverage_25/group_std_mean": 0.07209322452545167, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01643000766634941, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008116121869534254, "signal/frontier_coverage_5/centered_abs_mean": 0.14871807992458344, "signal/frontier_coverage_5/group_std_mean": 0.1889048457145691, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.042703104019165036, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021266685565933586, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2699484646320343, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3443294107913971, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5487543344497681, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026994846761226654, "step": 245 }, { "calibration/aurc": 0.19084901152374725, "calibration/batch_distribution_entropy": 0.9701281507957082, "calibration/buffer_distribution_entropy": 0.9964064744035049, "calibration/confidence_entropy": 0.46336307930016296, "calibration/coverage@0%": 0.033203125, "calibration/coverage@1%": 0.033203125, "calibration/coverage@10%": 0.47109375, "calibration/coverage@15%": 0.54453125, "calibration/coverage@20%": 0.6015625, "calibration/coverage@25%": 0.66328125, "calibration/coverage@30%": 0.724609375, "calibration/coverage@5%": 0.2921875, "calibration/ece": 0.1200733853568633, "calibration/mean_confidence": 0.47809412117138994, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 538.0, "completions/max_terminated_length": 538.0, "completions/mean_length": 229.1568359375, "completions/mean_terminated_length": 229.1568359375, "completions/min_length": 122.8, "completions/min_terminated_length": 122.8, "epoch": 0.8, "grad_norm": 0.00832080002874136, "learning_rate": 1e-06, "loss": 0.0015, "num_tokens": 849276516.0, "reward": 0.9613359928131103, "reward_std": 0.07218454480171203, "rewards/accuracy_reward": 0.60205078125, "rewards/brier_reward": 0.8215224504470825, "rewards/confidence_uniqueness_reward": 0.9497822642326355, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.10563646852970124, "rewards/frontier_coverage_1": 0.10563646852970124, "rewards/frontier_coverage_10": 0.1023257091641426, "rewards/frontier_coverage_15": 0.07141236141324044, "rewards/frontier_coverage_20": 0.059184766560792926, "rewards/frontier_coverage_25": 0.07559897229075432, "rewards/frontier_coverage_5": 0.10563607960939407, "rewards/frontier_entropy_batch_reward": -0.2571470856666565, "signal/accuracy_reward/centered_abs_mean": 0.070184326171875, "signal/accuracy_reward/group_std_mean": 0.09646072834730149, "signal/accuracy_reward/group_zero_std_frac": 0.7125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7383540868759155, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0350921630859375, "signal/advantage_abs_mean": 0.773518168926239, "signal/advantage_pre_scale_abs_mean": 0.05557697787880898, "signal/advantage_pre_scale_std": 0.09160784184932709, "signal/advantage_std": 0.9823978185653687, "signal/brier_reward/centered_abs_mean": 0.09445693641901017, "signal/brier_reward/group_std_mean": 0.12324505150318146, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19952306747436524, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009445693716406823, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013652579858899117, "signal/confidence_uniqueness_reward/group_std_mean": 0.017295270599424838, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029012787342071533, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013652580324560403, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020907722413539887, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13039756417274476, "signal/frontier_coverage_0/group_std_mean": 0.1706514060497284, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039599084109067914, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018646851414814592, "signal/frontier_coverage_1/centered_abs_mean": 0.13039756417274476, "signal/frontier_coverage_1/group_std_mean": 0.1706514060497284, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039599084109067914, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018646851414814592, "signal/frontier_coverage_10/centered_abs_mean": 0.12471685260534286, "signal/frontier_coverage_10/group_std_mean": 0.163266384601593, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03788367658853531, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017834509257227183, "signal/frontier_coverage_15/centered_abs_mean": 0.0766264021396637, "signal/frontier_coverage_15/group_std_mean": 0.10058980733156205, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023319342732429506, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010957575403153896, "signal/frontier_coverage_20/centered_abs_mean": 0.0527132585644722, "signal/frontier_coverage_20/group_std_mean": 0.06767940372228623, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01602230276912451, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000753799604717642, "signal/frontier_coverage_25/centered_abs_mean": 0.05887153521180153, "signal/frontier_coverage_25/group_std_mean": 0.07468887120485306, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0178463090211153, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008418629644438624, "signal/frontier_coverage_5/centered_abs_mean": 0.1303926795721054, "signal/frontier_coverage_5/group_std_mean": 0.17064524292945862, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03959760367870331, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001864615362137556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29742798805236814, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36837912201881406, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6314390063285827, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029742800071835516, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4398142182921856, "eval_calibration/batch_distribution_entropy": 0.8782582909606457, "eval_calibration/buffer_distribution_entropy": 0.9965439859027538, "eval_calibration/confidence_entropy": 0.4680754710266846, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.09375, "eval_calibration/coverage@15%": 0.109375, "eval_calibration/coverage@20%": 0.1171875, "eval_calibration/coverage@25%": 0.2890625, "eval_calibration/coverage@30%": 0.3671875, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.19118800764913552, "eval_calibration/mean_confidence": 0.4170219948301839, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 350.75, "eval_completions/max_terminated_length": 350.75, "eval_completions/mean_length": 220.90200805664062, "eval_completions/mean_terminated_length": 220.90200805664062, "eval_completions/min_length": 138.25, "eval_completions/min_terminated_length": 138.25, "eval_loss": 0.0, "eval_num_tokens": 849276516.0, "eval_reward": 0.8079598397016525, "eval_reward_std": 0.22688810154795647, "eval_rewards/accuracy_reward": 0.4453125, "eval_rewards/brier_reward": 0.8076187521219254, "eval_rewards/confidence_uniqueness_reward": 0.89892578125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.20175327360630035, "eval_rewards/frontier_coverage_1": 0.20175327360630035, "eval_rewards/frontier_coverage_10": 0.19296763092279434, "eval_rewards/frontier_coverage_15": 0.11704839393496513, "eval_rewards/frontier_coverage_20": 0.06113920174539089, "eval_rewards/frontier_coverage_25": 0.04800493270158768, "eval_rewards/frontier_coverage_5": 0.2017485909163952, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 19.6093, "eval_samples_per_second": 25.498, "eval_signal/accuracy_reward/centered_abs_mean": 0.4775390625, "eval_signal/accuracy_reward/group_std_mean": 0.49622878432273865, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0528854429721832, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23876953125, "eval_signal/advantage_abs_mean": 0.9418710172176361, "eval_signal/advantage_pre_scale_abs_mean": 0.2140355482697487, "eval_signal/advantage_pre_scale_std": 0.22431568056344986, "eval_signal/advantage_std": 0.9876823276281357, "eval_signal/brier_reward/centered_abs_mean": 0.1818385049700737, "eval_signal/brier_reward/group_std_mean": 0.23753474280238152, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08033054694533348, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018183850217610598, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.040863037109375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04884030018001795, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01809265185147524, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004086303699295968, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.38850048184394836, "eval_signal/frontier_coverage_0/group_std_mean": 0.46720458567142487, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02454289235174656, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005555556854233146, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.38850048184394836, "eval_signal/frontier_coverage_1/group_std_mean": 0.46720458567142487, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02454289235174656, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005555556854233146, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.37041959911584854, "eval_signal/frontier_coverage_10/group_std_mean": 0.44689878821372986, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02340186294168234, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005297000170685351, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.21324453875422478, "eval_signal/frontier_coverage_15/group_std_mean": 0.26560650020837784, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013475762913003564, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030493969097733498, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.0958902109414339, "eval_signal/frontier_coverage_20/group_std_mean": 0.12255750596523285, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006055898265913129, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013712299696635455, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.0840714368969202, "eval_signal/frontier_coverage_25/group_std_mean": 0.11348609812557697, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0053028815891593695, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001202221552375704, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3884858936071396, "eval_signal/frontier_coverage_5/group_std_mean": 0.4671877399086952, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.024541971273720264, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00555534812156111, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.204, "step": 250 }, { "calibration/aurc": 0.2088603333490008, "calibration/batch_distribution_entropy": 0.9478072553473524, "calibration/buffer_distribution_entropy": 0.9963203098504536, "calibration/confidence_entropy": 0.4611221206641368, "calibration/coverage@0%": 0.03203125, "calibration/coverage@1%": 0.03203125, "calibration/coverage@10%": 0.235546875, "calibration/coverage@15%": 0.3703125, "calibration/coverage@20%": 0.575, "calibration/coverage@25%": 0.6890625, "calibration/coverage@30%": 0.8125, "calibration/coverage@5%": 0.114453125, "calibration/ece": 0.15271014624473428, "calibration/mean_confidence": 0.4928081139273847, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 408.6, "completions/max_terminated_length": 408.6, "completions/mean_length": 214.7255859375, "completions/mean_terminated_length": 214.7255859375, "completions/min_length": 116.6, "completions/min_terminated_length": 116.6, "epoch": 0.816, "grad_norm": 0.013140806928277016, "learning_rate": 1e-06, "loss": 0.0024, "num_tokens": 866574474.0, "reward": 0.9542932391166687, "reward_std": 0.07651791870594024, "rewards/accuracy_reward": 0.5873046875, "rewards/brier_reward": 0.8041110873222351, "rewards/confidence_uniqueness_reward": 0.9512474060058593, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.09355135262012482, "rewards/frontier_coverage_1": 0.09355135262012482, "rewards/frontier_coverage_10": 0.09087654128670693, "rewards/frontier_coverage_15": 0.06994750797748565, "rewards/frontier_coverage_20": 0.05408108681440353, "rewards/frontier_coverage_25": 0.06337937340140343, "rewards/frontier_coverage_5": 0.09354995414614678, "rewards/frontier_entropy_batch_reward": -0.2288777768611908, "signal/accuracy_reward/centered_abs_mean": 0.084423828125, "signal/accuracy_reward/group_std_mean": 0.10996274501085282, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.846988070011139, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0422119140625, "signal/advantage_abs_mean": 0.7726068496704102, "signal/advantage_pre_scale_abs_mean": 0.05965398624539375, "signal/advantage_pre_scale_std": 0.09631071537733078, "signal/advantage_std": 0.9825076580047607, "signal/brier_reward/centered_abs_mean": 0.10241206586360932, "signal/brier_reward/group_std_mean": 0.13025801181793212, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20598589181900023, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010241207107901574, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012978291511535645, "signal/confidence_uniqueness_reward/group_std_mean": 0.01621840223670006, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02623649425804615, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012978291837498547, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.15378101766109467, "signal/frontier_coverage_0/group_std_mean": 0.19407180547714234, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04420729205012321, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021990684792399405, "signal/frontier_coverage_1/centered_abs_mean": 0.15378101766109467, "signal/frontier_coverage_1/group_std_mean": 0.19407180547714234, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04420729205012321, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021990684792399405, "signal/frontier_coverage_10/centered_abs_mean": 0.1467003881931305, "signal/frontier_coverage_10/group_std_mean": 0.1849285840988159, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0421836256980896, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020978155313059687, "signal/frontier_coverage_15/centered_abs_mean": 0.0903098776936531, "signal/frontier_coverage_15/group_std_mean": 0.11397473961114883, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025973235443234443, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00129143123049289, "signal/frontier_coverage_20/centered_abs_mean": 0.05505901947617531, "signal/frontier_coverage_20/group_std_mean": 0.06974020153284073, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015893121249973774, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007873439695686102, "signal/frontier_coverage_25/centered_abs_mean": 0.05497596263885498, "signal/frontier_coverage_25/group_std_mean": 0.07016591727733612, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015900985337793827, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007861562888137996, "signal/frontier_coverage_5/centered_abs_mean": 0.15375557243824006, "signal/frontier_coverage_5/group_std_mean": 0.19404107630252837, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04419991746544838, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021987047512084246, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2851732075214386, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3577597439289093, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5751234650611877, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02851732075214386, "step": 255 }, { "calibration/aurc": 0.27243795055495346, "calibration/batch_distribution_entropy": 0.9705400726977077, "calibration/buffer_distribution_entropy": 0.9958187429831661, "calibration/confidence_entropy": 0.48087383541988693, "calibration/coverage@0%": 0.073046875, "calibration/coverage@1%": 0.073046875, "calibration/coverage@10%": 0.2484375, "calibration/coverage@15%": 0.3125, "calibration/coverage@20%": 0.36953125, "calibration/coverage@25%": 0.437109375, "calibration/coverage@30%": 0.5390625, "calibration/coverage@5%": 0.185546875, "calibration/ece": 0.12119329880682919, "calibration/mean_confidence": 0.4874164737876063, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 442.8, "completions/max_terminated_length": 442.8, "completions/mean_length": 207.35478515625, "completions/mean_terminated_length": 207.35478515625, "completions/min_length": 112.8, "completions/min_terminated_length": 112.8, "epoch": 0.832, "grad_norm": 0.0085527915507555, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 883706139.0, "reward": 0.9477424025535583, "reward_std": 0.07137095481157303, "rewards/accuracy_reward": 0.56142578125, "rewards/brier_reward": 0.8303583145141602, "rewards/confidence_uniqueness_reward": 0.95125732421875, "rewards/format_reward": 1.0, "rewards/frontier_coverage_0": 0.13446062207221984, "rewards/frontier_coverage_1": 0.13446062207221984, "rewards/frontier_coverage_10": 0.12797623723745347, "rewards/frontier_coverage_15": 0.08604731857776642, "rewards/frontier_coverage_20": 0.06532630696892738, "rewards/frontier_coverage_25": 0.07860753238201142, "rewards/frontier_coverage_5": 0.13444683104753494, "rewards/frontier_entropy_batch_reward": -0.22019013166427612, "signal/accuracy_reward/centered_abs_mean": 0.073223876953125, "signal/accuracy_reward/group_std_mean": 0.1009349599480629, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8066681504249573, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0366119384765625, "signal/advantage_abs_mean": 0.7617290258407593, "signal/advantage_pre_scale_abs_mean": 0.054251715540885925, "signal/advantage_pre_scale_std": 0.09108677059412003, "signal/advantage_std": 0.9823128700256347, "signal/brier_reward/centered_abs_mean": 0.08786282539367676, "signal/brier_reward/group_std_mean": 0.11602227240800858, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19413544237613678, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.008786282502114773, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012720155715942382, "signal/confidence_uniqueness_reward/group_std_mean": 0.015890151262283325, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028258343040943146, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012720155995339156, "signal/format_reward/centered_abs_mean": 0.0, "signal/format_reward/group_std_mean": 0.0, "signal/format_reward/group_zero_std_frac": 1.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0, "signal/frontier_coverage_0/centered_abs_mean": 0.13118936717510224, "signal/frontier_coverage_0/group_std_mean": 0.17081800401210784, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0415608175098896, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018760079983621836, "signal/frontier_coverage_1/centered_abs_mean": 0.13118936717510224, "signal/frontier_coverage_1/group_std_mean": 0.17081800401210784, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0415608175098896, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018760079983621836, "signal/frontier_coverage_10/centered_abs_mean": 0.12370103597640991, "signal/frontier_coverage_10/group_std_mean": 0.16112378239631653, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0391960620880127, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017689247615635395, "signal/frontier_coverage_15/centered_abs_mean": 0.07675774544477462, "signal/frontier_coverage_15/group_std_mean": 0.10008619874715804, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024333661049604417, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010976357152685523, "signal/frontier_coverage_20/centered_abs_mean": 0.051222600787878034, "signal/frontier_coverage_20/group_std_mean": 0.06572640389204025, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01623872797936201, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007324831676669419, "signal/frontier_coverage_25/centered_abs_mean": 0.055126645416021344, "signal/frontier_coverage_25/group_std_mean": 0.07053503394126892, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017435478791594506, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007883110083639622, "signal/frontier_coverage_5/centered_abs_mean": 0.1311732068657875, "signal/frontier_coverage_5/group_std_mean": 0.17079680562019348, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04155569672584534, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001875776913948357, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.268852162361145, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3399739146232605, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5957050085067749, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02688521668314934, "step": 260 }, { "calibration/aurc": 0.2937613337989851, "calibration/batch_distribution_entropy": 0.9661228954350038, "calibration/buffer_distribution_entropy": 0.9959898170050472, "calibration/confidence_entropy": 0.4870354325118241, "calibration/coverage@0%": 0.07890930772994129, "calibration/coverage@1%": 0.0863311827299413, "calibration/coverage@10%": 0.2183624327299413, "calibration/coverage@15%": 0.2832084760273973, "calibration/coverage@20%": 0.44805222602739725, "calibration/coverage@25%": 0.5054756298923679, "calibration/coverage@30%": 0.5488365337573387, "calibration/coverage@5%": 0.12383118272994129, "calibration/ece": 0.16465495699217805, "calibration/mean_confidence": 0.5443247483323702, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 418.8, "completions/max_terminated_length": 418.8, "completions/mean_length": 201.392578125, "completions/mean_terminated_length": 201.4120880126953, "completions/min_length": 92.6, "completions/min_terminated_length": 114.6, "epoch": 0.848, "grad_norm": 0.011671687476336956, "learning_rate": 1e-06, "loss": 0.0009, "num_tokens": 900782767.0, "reward": 0.939740777015686, "reward_std": 0.07238752394914627, "rewards/accuracy_reward": 0.544921875, "rewards/brier_reward": 0.822428572177887, "rewards/confidence_uniqueness_reward": 0.952005398273468, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.13857043534517288, "rewards/frontier_coverage_1": 0.13857043534517288, "rewards/frontier_coverage_10": 0.1353096455335617, "rewards/frontier_coverage_15": 0.08651280254125596, "rewards/frontier_coverage_20": 0.05942459926009178, "rewards/frontier_coverage_25": 0.066201800853014, "rewards/frontier_coverage_5": 0.13855212330818176, "rewards/frontier_entropy_batch_reward": -0.21027669906616211, "signal/accuracy_reward/centered_abs_mean": 0.07392578125, "signal/accuracy_reward/group_std_mean": 0.09971548616886139, "signal/accuracy_reward/group_zero_std_frac": 0.709375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.810302484035492, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.036962890625, "signal/advantage_abs_mean": 0.7670840382575989, "signal/advantage_pre_scale_abs_mean": 0.05565920770168305, "signal/advantage_pre_scale_std": 0.09414431601762771, "signal/advantage_std": 0.9822983503341675, "signal/brier_reward/centered_abs_mean": 0.09511243402957917, "signal/brier_reward/group_std_mean": 0.12226448357105255, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21033987998962403, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009511243738234042, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01212963815778494, "signal/confidence_uniqueness_reward/group_std_mean": 0.015583262778818607, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026886271312832832, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012129638576880097, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002119513228535652, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.13417401611804963, "signal/frontier_coverage_0/group_std_mean": 0.17274006307125092, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.042503001540899275, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001918688416481018, "signal/frontier_coverage_1/centered_abs_mean": 0.13417401611804963, "signal/frontier_coverage_1/group_std_mean": 0.17274006307125092, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.042503001540899275, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001918688416481018, "signal/frontier_coverage_10/centered_abs_mean": 0.13046946972608567, "signal/frontier_coverage_10/group_std_mean": 0.16800358295440673, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04133014753460884, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001865713344886899, "signal/frontier_coverage_15/centered_abs_mean": 0.08114371299743653, "signal/frontier_coverage_15/group_std_mean": 0.10468510389328003, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0257135309278965, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011603550752624868, "signal/frontier_coverage_20/centered_abs_mean": 0.05167923718690872, "signal/frontier_coverage_20/group_std_mean": 0.06598224192857742, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016394700668752194, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007390130776911974, "signal/frontier_coverage_25/centered_abs_mean": 0.054591070115566256, "signal/frontier_coverage_25/group_std_mean": 0.06988776028156281, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017325525730848314, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007806522771716118, "signal/frontier_coverage_5/centered_abs_mean": 0.13415417671203614, "signal/frontier_coverage_5/group_std_mean": 0.1727146774530411, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04249679148197174, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019184047123417258, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2662226051092148, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3375477910041809, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.58951895236969, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026622261106967925, "step": 265 }, { "calibration/aurc": 0.2683589162003546, "calibration/batch_distribution_entropy": 0.9539461017175551, "calibration/buffer_distribution_entropy": 0.996350172832674, "calibration/confidence_entropy": 0.4911902207263893, "calibration/coverage@0%": 0.014453125, "calibration/coverage@1%": 0.014453125, "calibration/coverage@10%": 0.190234375, "calibration/coverage@15%": 0.234765625, "calibration/coverage@20%": 0.275, "calibration/coverage@25%": 0.455859375, "calibration/coverage@30%": 0.701171875, "calibration/coverage@5%": 0.10859375, "calibration/ece": 0.1506179715369549, "calibration/mean_confidence": 0.5922805837100238, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 439.2, "completions/max_terminated_length": 439.2, "completions/mean_length": 210.20791015625, "completions/mean_terminated_length": 210.2282470703125, "completions/min_length": 86.6, "completions/min_terminated_length": 109.8, "epoch": 0.864, "grad_norm": 0.029889076948165894, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 917922112.0, "reward": 0.9645228505134582, "reward_std": 0.0741073101758957, "rewards/accuracy_reward": 0.615234375, "rewards/brier_reward": 0.8081081867218017, "rewards/confidence_uniqueness_reward": 0.9501537799835205, "rewards/format_reward": 0.99990234375, "rewards/frontier_coverage_0": 0.07734835594892502, "rewards/frontier_coverage_1": 0.07734835594892502, "rewards/frontier_coverage_10": 0.07652404010295868, "rewards/frontier_coverage_15": 0.05951971411705017, "rewards/frontier_coverage_20": 0.05017582997679711, "rewards/frontier_coverage_25": 0.06625718101859093, "rewards/frontier_coverage_5": 0.0773268148303032, "rewards/frontier_entropy_batch_reward": -0.258000922203064, "signal/accuracy_reward/centered_abs_mean": 0.07364501953125, "signal/accuracy_reward/group_std_mean": 0.0979606881737709, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7580254793167114, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.036822509765625, "signal/advantage_abs_mean": 0.7813379168510437, "signal/advantage_pre_scale_abs_mean": 0.05818412229418755, "signal/advantage_pre_scale_std": 0.09502710700035095, "signal/advantage_std": 0.9824476718902588, "signal/brier_reward/centered_abs_mean": 0.0971069797873497, "signal/brier_reward/group_std_mean": 0.1244538113474846, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20138957500457763, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009710697643458843, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01293389480561018, "signal/confidence_uniqueness_reward/group_std_mean": 0.016561723686754702, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02688128352165222, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012933894526213408, "signal/format_reward/centered_abs_mean": 0.000189208984375, "signal/format_reward/group_std_mean": 0.0005524271633476019, "signal/format_reward/group_zero_std_frac": 0.996875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0020027007907629014, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 9.46044921875e-05, "signal/frontier_coverage_0/centered_abs_mean": 0.1333679139614105, "signal/frontier_coverage_0/group_std_mean": 0.17199209332466125, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03949750140309334, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019071611808612944, "signal/frontier_coverage_1/centered_abs_mean": 0.1333679139614105, "signal/frontier_coverage_1/group_std_mean": 0.17199209332466125, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03949750140309334, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019071611808612944, "signal/frontier_coverage_10/centered_abs_mean": 0.12856392711400985, "signal/frontier_coverage_10/group_std_mean": 0.16595734059810638, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.038083378970623014, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001838464173488319, "signal/frontier_coverage_15/centered_abs_mean": 0.07582932710647583, "signal/frontier_coverage_15/group_std_mean": 0.09822248071432113, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022478952631354333, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010843593161553144, "signal/frontier_coverage_20/centered_abs_mean": 0.051568976044654845, "signal/frontier_coverage_20/group_std_mean": 0.06592448204755783, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015313262306153774, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007374363485723734, "signal/frontier_coverage_25/centered_abs_mean": 0.05620872303843498, "signal/frontier_coverage_25/group_std_mean": 0.07158383950591088, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016713694483041764, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008037847350351512, "signal/frontier_coverage_5/centered_abs_mean": 0.13332813382148742, "signal/frontier_coverage_5/group_std_mean": 0.17194126546382904, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03948571756482124, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019065923523157835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29495537281036377, "signal/frontier_entropy_batch_reward/group_std_mean": 0.36441039443016054, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6126240730285645, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.029495537653565405, "step": 270 }, { "calibration/aurc": 0.336421051457335, "calibration/batch_distribution_entropy": 0.9816314252287958, "calibration/buffer_distribution_entropy": 0.9962709984337563, "calibration/confidence_entropy": 0.45712846813455643, "calibration/coverage@0%": 0.0078125, "calibration/coverage@1%": 0.0078125, "calibration/coverage@10%": 0.073828125, "calibration/coverage@15%": 0.141015625, "calibration/coverage@20%": 0.22578125, "calibration/coverage@25%": 0.300390625, "calibration/coverage@30%": 0.41171875, "calibration/coverage@5%": 0.0140625, "calibration/ece": 0.12420867423276796, "calibration/mean_confidence": 0.48923535681011315, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 564.8, "completions/max_terminated_length": 564.8, "completions/mean_length": 214.70439453125, "completions/mean_terminated_length": 214.70439453125, "completions/min_length": 112.6, "completions/min_terminated_length": 112.6, "epoch": 0.88, "grad_norm": 0.009292150847613811, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 935267757.0, "reward": 0.9303600192070007, "reward_std": 0.07878989428281784, "rewards/accuracy_reward": 0.5265625, "rewards/brier_reward": 0.8139204859733582, "rewards/confidence_uniqueness_reward": 0.9525596857070923, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.14567633271217345, "rewards/frontier_coverage_1": 0.14567633271217345, "rewards/frontier_coverage_10": 0.1417877972126007, "rewards/frontier_coverage_15": 0.08948986679315567, "rewards/frontier_coverage_20": 0.060767459124326705, "rewards/frontier_coverage_25": 0.06566002145409584, "rewards/frontier_coverage_5": 0.1456627994775772, "rewards/frontier_entropy_batch_reward": -0.20836096704006196, "signal/accuracy_reward/centered_abs_mean": 0.0876708984375, "signal/accuracy_reward/group_std_mean": 0.11543703377246857, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9269353866577148, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04383544921875, "signal/advantage_abs_mean": 0.7721322894096374, "signal/advantage_pre_scale_abs_mean": 0.061681386828422544, "signal/advantage_pre_scale_std": 0.10155243873596191, "signal/advantage_std": 0.9824026703834534, "signal/brier_reward/centered_abs_mean": 0.10448898226022721, "signal/brier_reward/group_std_mean": 0.1366585373878479, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22174761891365052, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010448898747563362, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012470530718564988, "signal/confidence_uniqueness_reward/group_std_mean": 0.016169047355651854, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026379484310746194, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012470531044527888, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.003887416422367096, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.15089958012104035, "signal/frontier_coverage_0/group_std_mean": 0.19479792714118957, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.045803869515657424, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002157864021137357, "signal/frontier_coverage_1/centered_abs_mean": 0.15089958012104035, "signal/frontier_coverage_1/group_std_mean": 0.19479792714118957, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.045803869515657424, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002157864021137357, "signal/frontier_coverage_10/centered_abs_mean": 0.14600327014923095, "signal/frontier_coverage_10/group_std_mean": 0.18865067064762114, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04432102143764496, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020878467708826064, "signal/frontier_coverage_15/centered_abs_mean": 0.0892187312245369, "signal/frontier_coverage_15/group_std_mean": 0.11559360474348068, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027101678028702735, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012758278753608465, "signal/frontier_coverage_20/centered_abs_mean": 0.05786134228110314, "signal/frontier_coverage_20/group_std_mean": 0.07404239922761917, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017570355907082558, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.000827417231630534, "signal/frontier_coverage_25/centered_abs_mean": 0.058361977338790894, "signal/frontier_coverage_25/group_std_mean": 0.07477803826332093, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017693167179822923, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008345762616954744, "signal/frontier_coverage_5/centered_abs_mean": 0.1508888840675354, "signal/frontier_coverage_5/group_std_mean": 0.19478428065776826, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.045800501853227614, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021577110514044763, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2678882539272308, "signal/frontier_entropy_batch_reward/group_std_mean": 0.34042693972587584, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5680663108825683, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026788827031850815, "step": 275 }, { "calibration/aurc": 0.30996998547659854, "calibration/batch_distribution_entropy": 0.9863903465512681, "calibration/buffer_distribution_entropy": 0.9963396773180555, "calibration/confidence_entropy": 0.491680642724153, "calibration/coverage@0%": 0.03242263943248532, "calibration/coverage@1%": 0.03242263943248532, "calibration/coverage@10%": 0.1660163894324853, "calibration/coverage@15%": 0.2105476394324853, "calibration/coverage@20%": 0.3489664872798434, "calibration/coverage@25%": 0.4736790606653621, "calibration/coverage@30%": 0.5557447101272015, "calibration/coverage@5%": 0.08164138943248532, "calibration/ece": 0.14309415417650714, "calibration/mean_confidence": 0.4845343095818896, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 592.6, "completions/max_terminated_length": 592.6, "completions/mean_length": 222.80419921875, "completions/mean_terminated_length": 222.82596740722656, "completions/min_length": 91.2, "completions/min_terminated_length": 114.2, "epoch": 0.896, "grad_norm": 0.010289231315255165, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 952660120.0, "reward": 0.9489871025085449, "reward_std": 0.0732444629073143, "rewards/accuracy_reward": 0.56728515625, "rewards/brier_reward": 0.8108451724052429, "rewards/confidence_uniqueness_reward": 0.9530300736427307, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.11231801509857178, "rewards/frontier_coverage_1": 0.11231801509857178, "rewards/frontier_coverage_10": 0.11174041628837586, "rewards/frontier_coverage_15": 0.07842456176877022, "rewards/frontier_coverage_20": 0.05463726818561554, "rewards/frontier_coverage_25": 0.06190679222345352, "rewards/frontier_coverage_5": 0.11231775730848312, "rewards/frontier_entropy_batch_reward": -0.20149709582328795, "signal/accuracy_reward/centered_abs_mean": 0.077850341796875, "signal/accuracy_reward/group_std_mean": 0.10645336210727692, "signal/accuracy_reward/group_zero_std_frac": 0.690625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.816600227355957, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0389251708984375, "signal/advantage_abs_mean": 0.7607104301452636, "signal/advantage_pre_scale_abs_mean": 0.05540677979588508, "signal/advantage_pre_scale_std": 0.09241203665733337, "signal/advantage_std": 0.9824150681495667, "signal/brier_reward/centered_abs_mean": 0.10257258415222167, "signal/brier_reward/group_std_mean": 0.13208626657724382, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21615420579910277, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010257259011268616, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01184554658830166, "signal/confidence_uniqueness_reward/group_std_mean": 0.01544493790715933, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02498168535530567, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0011845546774566173, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0039098581299185755, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.14897378981113435, "signal/frontier_coverage_0/group_std_mean": 0.19240249693393707, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.044983573257923126, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021303252782672645, "signal/frontier_coverage_1/centered_abs_mean": 0.14897378981113435, "signal/frontier_coverage_1/group_std_mean": 0.19240249693393707, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.044983573257923126, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021303252782672645, "signal/frontier_coverage_10/centered_abs_mean": 0.14458298087120056, "signal/frontier_coverage_10/group_std_mean": 0.18663305044174194, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04364226087927818, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020675365580245853, "signal/frontier_coverage_15/centered_abs_mean": 0.09132444709539414, "signal/frontier_coverage_15/group_std_mean": 0.1177283689379692, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027592913806438447, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001305939583107829, "signal/frontier_coverage_20/centered_abs_mean": 0.055938445031642914, "signal/frontier_coverage_20/group_std_mean": 0.07106180787086487, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01688665710389614, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007999197579920292, "signal/frontier_coverage_25/centered_abs_mean": 0.056100095808506015, "signal/frontier_coverage_25/group_std_mean": 0.07118469923734665, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016873976215720177, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008022313704714179, "signal/frontier_coverage_5/centered_abs_mean": 0.1489583134651184, "signal/frontier_coverage_5/group_std_mean": 0.1923828214406967, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0449789248406887, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021301037166267635, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26766058802604675, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3394420027732849, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5642510890960694, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026766058057546616, "step": 280 }, { "calibration/aurc": 0.3400950078940635, "calibration/batch_distribution_entropy": 0.9797264814453449, "calibration/buffer_distribution_entropy": 0.9966567405586122, "calibration/confidence_entropy": 0.48842966793674425, "calibration/coverage@0%": 0.00703660102739726, "calibration/coverage@1%": 0.00703660102739726, "calibration/coverage@10%": 0.043757644324853226, "calibration/coverage@15%": 0.13246774094911937, "calibration/coverage@20%": 0.2891351210861057, "calibration/coverage@25%": 0.3969644386007828, "calibration/coverage@30%": 0.4845240643346379, "calibration/coverage@5%": 0.00703660102739726, "calibration/ece": 0.12737401156946584, "calibration/mean_confidence": 0.5138945779347275, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 513.2, "completions/max_terminated_length": 513.2, "completions/mean_length": 229.2587890625, "completions/mean_terminated_length": 229.32558898925782, "completions/min_length": 95.4, "completions/min_terminated_length": 117.8, "epoch": 0.912, "grad_norm": 0.010692852549254894, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 970059026.0, "reward": 0.9425465822219848, "reward_std": 0.07733960896730423, "rewards/accuracy_reward": 0.555859375, "rewards/brier_reward": 0.8171261787414551, "rewards/confidence_uniqueness_reward": 0.9516117811203003, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.11456998586654663, "rewards/frontier_coverage_1": 0.11456998586654663, "rewards/frontier_coverage_10": 0.11256050020456314, "rewards/frontier_coverage_15": 0.07863751500844955, "rewards/frontier_coverage_20": 0.0569301575422287, "rewards/frontier_coverage_25": 0.06953249722719193, "rewards/frontier_coverage_5": 0.11456334218382835, "rewards/frontier_entropy_batch_reward": -0.21519100069999694, "signal/accuracy_reward/centered_abs_mean": 0.0781494140625, "signal/accuracy_reward/group_std_mean": 0.10967252552509307, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7355853736400604, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03907470703125, "signal/advantage_abs_mean": 0.7514355540275574, "signal/advantage_pre_scale_abs_mean": 0.05810001492500305, "signal/advantage_pre_scale_std": 0.09480322301387786, "signal/advantage_std": 0.9825671911239624, "signal/brier_reward/centered_abs_mean": 0.10597307980060577, "signal/brier_reward/group_std_mean": 0.1378685712814331, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2066969782114029, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010597308166325092, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.012444668635725974, "signal/confidence_uniqueness_reward/group_std_mean": 0.016399627551436424, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024437383562326432, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0012444668915122747, "signal/format_reward/centered_abs_mean": 0.00074462890625, "signal/format_reward/group_std_mean": 0.0018734002020210027, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.008089378848671913, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000372314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.14773554503917694, "signal/frontier_coverage_0/group_std_mean": 0.1906901478767395, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.041051150858402254, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002112618274986744, "signal/frontier_coverage_1/centered_abs_mean": 0.14773554503917694, "signal/frontier_coverage_1/group_std_mean": 0.1906901478767395, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.041051150858402254, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002112618274986744, "signal/frontier_coverage_10/centered_abs_mean": 0.14383466243743898, "signal/frontier_coverage_10/group_std_mean": 0.18568643629550935, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03997599333524704, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020568356849253178, "signal/frontier_coverage_15/centered_abs_mean": 0.09209516048431396, "signal/frontier_coverage_15/group_std_mean": 0.11944440305233002, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025683220103383066, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013169607846066356, "signal/frontier_coverage_20/centered_abs_mean": 0.0584987074136734, "signal/frontier_coverage_20/group_std_mean": 0.07466120570898056, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016389196924865245, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008365315268747508, "signal/frontier_coverage_25/centered_abs_mean": 0.06181689128279686, "signal/frontier_coverage_25/group_std_mean": 0.07882332503795624, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01727975495159626, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008839815389364957, "signal/frontier_coverage_5/centered_abs_mean": 0.147720268368721, "signal/frontier_coverage_5/group_std_mean": 0.1906701147556305, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04104689806699753, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021123998798429967, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2638342171907425, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33859837651252744, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5125030159950257, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026383423060178757, "step": 285 }, { "calibration/aurc": 0.38766553715032764, "calibration/batch_distribution_entropy": 0.981518331491662, "calibration/buffer_distribution_entropy": 0.9970985889687846, "calibration/confidence_entropy": 0.503844832131577, "calibration/coverage@0%": 0.00390625, "calibration/coverage@1%": 0.00390625, "calibration/coverage@10%": 0.0125, "calibration/coverage@15%": 0.039453125, "calibration/coverage@20%": 0.10234375, "calibration/coverage@25%": 0.214453125, "calibration/coverage@30%": 0.271875, "calibration/coverage@5%": 0.00390625, "calibration/ece": 0.10925647180136108, "calibration/mean_confidence": 0.4602850216584913, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 9.765625e-05, "completions/max_length": 554.6, "completions/max_terminated_length": 554.6, "completions/mean_length": 232.66123046875, "completions/mean_terminated_length": 232.6836395263672, "completions/min_length": 91.2, "completions/min_terminated_length": 115.4, "epoch": 0.928, "grad_norm": 0.011063729412853718, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 987468293.0, "reward": 0.9349685192108155, "reward_std": 0.07279682755470276, "rewards/accuracy_reward": 0.54267578125, "rewards/brier_reward": 0.809288215637207, "rewards/confidence_uniqueness_reward": 0.9514593601226806, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.12597917169332504, "rewards/frontier_coverage_1": 0.12597917169332504, "rewards/frontier_coverage_10": 0.12356604933738709, "rewards/frontier_coverage_15": 0.08888857066631317, "rewards/frontier_coverage_20": 0.06169629544019699, "rewards/frontier_coverage_25": 0.06558309346437455, "rewards/frontier_coverage_5": 0.1259745851159096, "rewards/frontier_entropy_batch_reward": -0.2260911613702774, "signal/accuracy_reward/centered_abs_mean": 0.074444580078125, "signal/accuracy_reward/group_std_mean": 0.09707383662462235, "signal/accuracy_reward/group_zero_std_frac": 0.728125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7905593991279602, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0372222900390625, "signal/advantage_abs_mean": 0.7814563870429992, "signal/advantage_pre_scale_abs_mean": 0.05725823864340782, "signal/advantage_pre_scale_std": 0.09308888167142867, "signal/advantage_std": 0.98237384557724, "signal/brier_reward/centered_abs_mean": 0.10359592139720916, "signal/brier_reward/group_std_mean": 0.13340485095977783, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22268273532390595, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01035959217697382, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013044451922178268, "signal/confidence_uniqueness_reward/group_std_mean": 0.016821864247322082, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028180352598428726, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013044452294707297, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00401168242096901, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.14871254861354827, "signal/frontier_coverage_0/group_std_mean": 0.18960267603397368, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04581791833043099, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002126589324325323, "signal/frontier_coverage_1/centered_abs_mean": 0.14871254861354827, "signal/frontier_coverage_1/group_std_mean": 0.18960267603397368, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04581791833043099, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002126589324325323, "signal/frontier_coverage_10/centered_abs_mean": 0.14471837282180786, "signal/frontier_coverage_10/group_std_mean": 0.1845400959253311, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04460237473249436, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002069472731091082, "signal/frontier_coverage_15/centered_abs_mean": 0.09628659933805465, "signal/frontier_coverage_15/group_std_mean": 0.12331466376781464, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029709017276763915, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013768984004855155, "signal/frontier_coverage_20/centered_abs_mean": 0.058573897927999496, "signal/frontier_coverage_20/group_std_mean": 0.07467443645000457, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018021496012806892, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008376067155040801, "signal/frontier_coverage_25/centered_abs_mean": 0.0570153571665287, "signal/frontier_coverage_25/group_std_mean": 0.07228669673204421, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017514837346971034, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008153195609338582, "signal/frontier_coverage_5/centered_abs_mean": 0.14869737327098848, "signal/frontier_coverage_5/group_std_mean": 0.18958347141742707, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04581324979662895, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021263723261654376, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.27987065613269807, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3498177230358124, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6000606775283813, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02798706628382206, "step": 290 }, { "calibration/aurc": 0.23378421040895736, "calibration/batch_distribution_entropy": 0.9588926911239236, "calibration/buffer_distribution_entropy": 0.9973781770111927, "calibration/confidence_entropy": 0.4632707925429629, "calibration/coverage@0%": 0.026171875, "calibration/coverage@1%": 0.051171875, "calibration/coverage@10%": 0.203515625, "calibration/coverage@15%": 0.390625, "calibration/coverage@20%": 0.52109375, "calibration/coverage@25%": 0.619140625, "calibration/coverage@30%": 0.683984375, "calibration/coverage@5%": 0.100390625, "calibration/ece": 0.1241622176784349, "calibration/mean_confidence": 0.44058108322948064, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 568.0, "completions/max_terminated_length": 568.0, "completions/mean_length": 233.37685546875, "completions/mean_terminated_length": 233.42279663085938, "completions/min_length": 93.4, "completions/min_terminated_length": 118.2, "epoch": 0.944, "grad_norm": 0.01295262761414051, "learning_rate": 1e-06, "loss": 0.0006, "num_tokens": 1004833496.0, "reward": 0.9346524477005005, "reward_std": 0.07876813262701035, "rewards/accuracy_reward": 0.54560546875, "rewards/brier_reward": 0.8073648333549499, "rewards/confidence_uniqueness_reward": 0.9499351501464843, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.13496437668800354, "rewards/frontier_coverage_1": 0.13496437668800354, "rewards/frontier_coverage_10": 0.13205459117889404, "rewards/frontier_coverage_15": 0.09704540967941284, "rewards/frontier_coverage_20": 0.0626195065677166, "rewards/frontier_coverage_25": 0.060534913837909696, "rewards/frontier_coverage_5": 0.13495712727308273, "rewards/frontier_entropy_batch_reward": -0.24609753489494324, "signal/accuracy_reward/centered_abs_mean": 0.095135498046875, "signal/accuracy_reward/group_std_mean": 0.12562316060066223, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9591840744018555, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0475677490234375, "signal/advantage_abs_mean": 0.7657712578773499, "signal/advantage_pre_scale_abs_mean": 0.06128266230225563, "signal/advantage_pre_scale_std": 0.09885531663894653, "signal/advantage_std": 0.9824792623519898, "signal/brier_reward/centered_abs_mean": 0.10642676800489426, "signal/brier_reward/group_std_mean": 0.13670923560857773, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21600624322891235, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010642676800489425, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013227501884102821, "signal/confidence_uniqueness_reward/group_std_mean": 0.017046448588371278, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027110712230205537, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013227502349764109, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0038907095789909364, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.17288099527359008, "signal/frontier_coverage_0/group_std_mean": 0.22045514285564421, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.05001463890075684, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002472198219038546, "signal/frontier_coverage_1/centered_abs_mean": 0.17288099527359008, "signal/frontier_coverage_1/group_std_mean": 0.22045514285564421, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.05001463890075684, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002472198219038546, "signal/frontier_coverage_10/centered_abs_mean": 0.16872143149375915, "signal/frontier_coverage_10/group_std_mean": 0.21528524458408355, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.048827193677425385, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024127164389938114, "signal/frontier_coverage_15/centered_abs_mean": 0.11024031639099122, "signal/frontier_coverage_15/group_std_mean": 0.14126538336277009, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03192974366247654, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001576436497271061, "signal/frontier_coverage_20/centered_abs_mean": 0.06356698796153068, "signal/frontier_coverage_20/group_std_mean": 0.08100138902664185, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01849036365747452, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009090078994631768, "signal/frontier_coverage_25/centered_abs_mean": 0.054581372439861296, "signal/frontier_coverage_25/group_std_mean": 0.06919515281915664, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01598366592079401, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007805136265233159, "signal/frontier_coverage_5/centered_abs_mean": 0.17286439538002013, "signal/frontier_coverage_5/group_std_mean": 0.22043438553810119, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.05000990778207779, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024719608947634695, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.278346860408783, "signal/frontier_entropy_batch_reward/group_std_mean": 0.35178478956222536, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5688707113265992, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.027834687754511832, "step": 295 }, { "calibration/aurc": 0.31441186503547164, "calibration/batch_distribution_entropy": 0.9633186459663221, "calibration/buffer_distribution_entropy": 0.9972034755573844, "calibration/confidence_entropy": 0.449966523396641, "calibration/coverage@0%": 0.01328125, "calibration/coverage@1%": 0.01328125, "calibration/coverage@10%": 0.119921875, "calibration/coverage@15%": 0.2625, "calibration/coverage@20%": 0.3, "calibration/coverage@25%": 0.37109375, "calibration/coverage@30%": 0.51015625, "calibration/coverage@5%": 0.0390625, "calibration/ece": 0.1385596621596186, "calibration/mean_confidence": 0.5507708088766715, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 670.2, "completions/max_terminated_length": 670.2, "completions/mean_length": 231.98203125, "completions/mean_terminated_length": 232.07284545898438, "completions/min_length": 68.2, "completions/min_terminated_length": 114.8, "epoch": 0.96, "grad_norm": 0.008244195021688938, "learning_rate": 1e-06, "loss": -0.0072, "num_tokens": 1022149312.0, "reward": 0.9294110298156738, "reward_std": 0.0755992129445076, "rewards/accuracy_reward": 0.532421875, "rewards/brier_reward": 0.8261340737342835, "rewards/confidence_uniqueness_reward": 0.9491169810295105, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.15588035583496093, "rewards/frontier_coverage_1": 0.15588035583496093, "rewards/frontier_coverage_10": 0.15350482165813445, "rewards/frontier_coverage_15": 0.10767751783132554, "rewards/frontier_coverage_20": 0.07077482268214226, "rewards/frontier_coverage_25": 0.07669939547777176, "rewards/frontier_coverage_5": 0.15587256848812103, "rewards/frontier_entropy_batch_reward": -0.26562986969947816, "signal/accuracy_reward/centered_abs_mean": 0.07545166015625, "signal/accuracy_reward/group_std_mean": 0.10325214564800263, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7740512013435363, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.037725830078125, "signal/advantage_abs_mean": 0.7682592153549195, "signal/advantage_pre_scale_abs_mean": 0.05740421935915947, "signal/advantage_pre_scale_std": 0.09488718807697297, "signal/advantage_std": 0.982464599609375, "signal/brier_reward/centered_abs_mean": 0.09947880506515502, "signal/brier_reward/group_std_mean": 0.1302649974822998, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20439462959766388, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009947880543768406, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014108524098992348, "signal/confidence_uniqueness_reward/group_std_mean": 0.019470517709851264, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029055507853627205, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014108523493632675, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629800856113, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.011646222323179245, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_coverage_0/centered_abs_mean": 0.142266646027565, "signal/frontier_coverage_0/group_std_mean": 0.18246809244155884, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04174907729029655, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020344130927696822, "signal/frontier_coverage_1/centered_abs_mean": 0.142266646027565, "signal/frontier_coverage_1/group_std_mean": 0.18246809244155884, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04174907729029655, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020344130927696822, "signal/frontier_coverage_10/centered_abs_mean": 0.14002286195755004, "signal/frontier_coverage_10/group_std_mean": 0.17962463200092316, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.041089994460344316, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002002326911315322, "signal/frontier_coverage_15/centered_abs_mean": 0.09163234382867813, "signal/frontier_coverage_15/group_std_mean": 0.11807395815849304, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026894450187683105, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00131034255027771, "signal/frontier_coverage_20/centered_abs_mean": 0.05888952389359474, "signal/frontier_coverage_20/group_std_mean": 0.07460112124681473, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01728636063635349, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008421201724559069, "signal/frontier_coverage_25/centered_abs_mean": 0.060860900580883025, "signal/frontier_coverage_25/group_std_mean": 0.07711833715438843, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017893003672361373, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008703108527697623, "signal/frontier_coverage_5/centered_abs_mean": 0.14225718677043914, "signal/frontier_coverage_5/group_std_mean": 0.1824559450149536, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04174629151821137, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020342777483165265, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.29106062054634096, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3644901514053345, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5973328590393067, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02910606376826763, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4415723707304741, "eval_calibration/batch_distribution_entropy": 0.9349213247980521, "eval_calibration/buffer_distribution_entropy": 0.9966973513521955, "eval_calibration/confidence_entropy": 0.47848555053430986, "eval_calibration/coverage@0%": 0.0859375, "eval_calibration/coverage@1%": 0.0859375, "eval_calibration/coverage@10%": 0.0859375, "eval_calibration/coverage@15%": 0.109375, "eval_calibration/coverage@20%": 0.1484375, "eval_calibration/coverage@25%": 0.1796875, "eval_calibration/coverage@30%": 0.2109375, "eval_calibration/coverage@5%": 0.0859375, "eval_calibration/ece": 0.1948771309265625, "eval_calibration/mean_confidence": 0.45367350186093747, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 457.0, "eval_completions/max_terminated_length": 457.0, "eval_completions/mean_length": 241.4718475341797, "eval_completions/mean_terminated_length": 241.4718475341797, "eval_completions/min_length": 144.25, "eval_completions/min_terminated_length": 144.25, "eval_loss": 0.0, "eval_num_tokens": 1022149312.0, "eval_reward": 0.8029894828796387, "eval_reward_std": 0.2307339571416378, "eval_rewards/accuracy_reward": 0.435546875, "eval_rewards/brier_reward": 0.8061055541038513, "eval_rewards/confidence_uniqueness_reward": 0.8974609375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.1957620456814766, "eval_rewards/frontier_coverage_1": 0.1957620456814766, "eval_rewards/frontier_coverage_10": 0.19467196241021156, "eval_rewards/frontier_coverage_15": 0.13251663371920586, "eval_rewards/frontier_coverage_20": 0.06984788924455643, "eval_rewards/frontier_coverage_25": 0.05480411183089018, "eval_rewards/frontier_coverage_5": 0.1957547478377819, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 23.0845, "eval_samples_per_second": 21.66, "eval_signal/accuracy_reward/centered_abs_mean": 0.4744873046875, "eval_signal/accuracy_reward/group_std_mean": 0.4946645200252533, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0288754552602768, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23724365234375, "eval_signal/advantage_abs_mean": 0.939881756901741, "eval_signal/advantage_pre_scale_abs_mean": 0.21714717894792557, "eval_signal/advantage_pre_scale_std": 0.2281285598874092, "eval_signal/advantage_std": 0.9876896291971207, "eval_signal/brier_reward/centered_abs_mean": 0.18803402036428452, "eval_signal/brier_reward/group_std_mean": 0.23641518875956535, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08167162910103798, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018803401850163937, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0414581298828125, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04806934855878353, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01800214545801282, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004145812941715121, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.36271509528160095, "eval_signal/frontier_coverage_0/group_std_mean": 0.4364357739686966, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02251249784603715, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005186826107092202, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36271509528160095, "eval_signal/frontier_coverage_1/group_std_mean": 0.4364357739686966, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02251249784603715, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005186826107092202, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.36009519547224045, "eval_signal/frontier_coverage_10/group_std_mean": 0.4333682507276535, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.022349967621266842, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005149361444637179, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2319328859448433, "eval_signal/frontier_coverage_15/group_std_mean": 0.28401825577020645, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01439695293083787, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003316640213597566, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10108834877610207, "eval_signal/frontier_coverage_20/group_std_mean": 0.12704241834580898, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006276872823946178, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014455633936449885, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.09253444895148277, "eval_signal/frontier_coverage_25/group_std_mean": 0.1211695522069931, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00573564157821238, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013232426135800779, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3626917079091072, "eval_signal/frontier_coverage_5/group_std_mean": 0.43640825897455215, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.022511047311127186, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005186491413041949, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.173, "step": 300 }, { "calibration/aurc": 0.2298664176915633, "calibration/batch_distribution_entropy": 0.9806768056698789, "calibration/buffer_distribution_entropy": 0.9966620218604406, "calibration/confidence_entropy": 0.5069850261975526, "calibration/coverage@0%": 0.07621101424361493, "calibration/coverage@1%": 0.10316413924361494, "calibration/coverage@10%": 0.327171844302554, "calibration/coverage@15%": 0.4347433693516699, "calibration/coverage@20%": 0.5016047089882122, "calibration/coverage@25%": 0.571963255157171, "calibration/coverage@30%": 0.6295002455795677, "calibration/coverage@5%": 0.20215572814341848, "calibration/ece": 0.1638740621100828, "calibration/mean_confidence": 0.4818879265565929, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 609.2, "completions/max_terminated_length": 609.2, "completions/mean_length": 246.87958984375, "completions/mean_terminated_length": 247.02230834960938, "completions/min_length": 81.2, "completions/min_terminated_length": 130.4, "epoch": 0.976, "grad_norm": 0.013785382732748985, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 1039538479.0, "reward": 0.9428975343704223, "reward_std": 0.07416855841875077, "rewards/accuracy_reward": 0.56025390625, "rewards/brier_reward": 0.8010736227035522, "rewards/confidence_uniqueness_reward": 0.9515250086784363, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.10944837052375078, "rewards/frontier_coverage_1": 0.10944837052375078, "rewards/frontier_coverage_10": 0.10874446658417583, "rewards/frontier_coverage_15": 0.08354733660817146, "rewards/frontier_coverage_20": 0.05517433062195778, "rewards/frontier_coverage_25": 0.058141480386257174, "rewards/frontier_coverage_5": 0.10943909073248506, "rewards/frontier_entropy_batch_reward": -0.2126171350479126, "signal/accuracy_reward/centered_abs_mean": 0.076971435546875, "signal/accuracy_reward/group_std_mean": 0.11019863039255143, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7614342093467712, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0384857177734375, "signal/advantage_abs_mean": 0.7454835653305054, "signal/advantage_pre_scale_abs_mean": 0.054844215512275696, "signal/advantage_pre_scale_std": 0.09041195660829544, "signal/advantage_std": 0.9824780821800232, "signal/brier_reward/centered_abs_mean": 0.09713428020477295, "signal/brier_reward/group_std_mean": 0.12663674652576445, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1961934447288513, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009713428001850844, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01313753817230463, "signal/confidence_uniqueness_reward/group_std_mean": 0.0176511786878109, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026738233864307404, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013137538451701402, "signal/format_reward/centered_abs_mean": 0.00111083984375, "signal/format_reward/group_std_mean": 0.0026419460773468018, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010505561530590058, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000555419921875, "signal/frontier_coverage_0/centered_abs_mean": 0.1482255771756172, "signal/frontier_coverage_0/group_std_mean": 0.1937095880508423, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0427293211221695, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021196257323026655, "signal/frontier_coverage_1/centered_abs_mean": 0.1482255771756172, "signal/frontier_coverage_1/group_std_mean": 0.1937095880508423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0427293211221695, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021196257323026655, "signal/frontier_coverage_10/centered_abs_mean": 0.14717507511377334, "signal/frontier_coverage_10/group_std_mean": 0.19235173761844634, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04242658242583275, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021046036155894397, "signal/frontier_coverage_15/centered_abs_mean": 0.09856034517288208, "signal/frontier_coverage_15/group_std_mean": 0.12866656184196473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028408873453736307, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014094128971919417, "signal/frontier_coverage_20/centered_abs_mean": 0.05635328218340874, "signal/frontier_coverage_20/group_std_mean": 0.07260994017124175, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016331818141043185, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008058519102633, "signal/frontier_coverage_25/centered_abs_mean": 0.05241282656788826, "signal/frontier_coverage_25/group_std_mean": 0.06700127571821213, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015302561409771442, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007495034369640052, "signal/frontier_coverage_5/centered_abs_mean": 0.148216313123703, "signal/frontier_coverage_5/group_std_mean": 0.1936979979276657, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04272666648030281, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021194932982325555, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.26816104650497435, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3424443662166595, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5510828495025635, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.026816104725003242, "step": 305 }, { "calibration/aurc": 0.33477025169694324, "calibration/batch_distribution_entropy": 0.9750517619387263, "calibration/buffer_distribution_entropy": 0.9969642801765097, "calibration/confidence_entropy": 0.49620530918619676, "calibration/coverage@0%": 0.02970992284832691, "calibration/coverage@1%": 0.02970992284832691, "calibration/coverage@10%": 0.11534012098922711, "calibration/coverage@15%": 0.16945480221028464, "calibration/coverage@20%": 0.2176473629047102, "calibration/coverage@25%": 0.3736974780121257, "calibration/coverage@30%": 0.4831432959814198, "calibration/coverage@5%": 0.04533492284832691, "calibration/ece": 0.13145712083874353, "calibration/mean_confidence": 0.4428615850637298, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 747.2, "completions/max_terminated_length": 747.2, "completions/mean_length": 256.34677734375, "completions/mean_terminated_length": 256.6212921142578, "completions/min_length": 47.0, "completions/min_terminated_length": 127.6, "epoch": 0.992, "grad_norm": 0.0102123087272048, "learning_rate": 1e-06, "loss": 0.0046, "num_tokens": 1057291950.0, "reward": 0.9348322153091431, "reward_std": 0.07765627354383468, "rewards/accuracy_reward": 0.54091796875, "rewards/brier_reward": 0.8109241127967834, "rewards/confidence_uniqueness_reward": 0.9507102489471435, "rewards/format_reward": 0.998828125, "rewards/frontier_coverage_0": 0.133108651638031, "rewards/frontier_coverage_1": 0.133108651638031, "rewards/frontier_coverage_10": 0.13217684626579285, "rewards/frontier_coverage_15": 0.09742676615715026, "rewards/frontier_coverage_20": 0.0603479154407978, "rewards/frontier_coverage_25": 0.061624595522880556, "rewards/frontier_coverage_5": 0.1331046998500824, "rewards/frontier_entropy_batch_reward": -0.2194212406873703, "signal/accuracy_reward/centered_abs_mean": 0.084979248046875, "signal/accuracy_reward/group_std_mean": 0.11238697618246078, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8460070013999939, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0424896240234375, "signal/advantage_abs_mean": 0.763712465763092, "signal/advantage_pre_scale_abs_mean": 0.05977008268237114, "signal/advantage_pre_scale_std": 0.09724251925945282, "signal/advantage_std": 0.9824697017669678, "signal/brier_reward/centered_abs_mean": 0.09974109381437302, "signal/brier_reward/group_std_mean": 0.12734393328428267, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20343088805675508, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009974109753966332, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014469091221690178, "signal/confidence_uniqueness_reward/group_std_mean": 0.019127808138728143, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029598025232553483, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014469092013314367, "signal/format_reward/centered_abs_mean": 0.002001953125, "signal/format_reward/group_std_mean": 0.0037383693270385265, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02066621519625187, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010009765625, "signal/frontier_coverage_0/centered_abs_mean": 0.16110625863075256, "signal/frontier_coverage_0/group_std_mean": 0.2007138967514038, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04667669981718063, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023038194980472327, "signal/frontier_coverage_1/centered_abs_mean": 0.16110625863075256, "signal/frontier_coverage_1/group_std_mean": 0.2007138967514038, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04667669981718063, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023038194980472327, "signal/frontier_coverage_10/centered_abs_mean": 0.15938453674316405, "signal/frontier_coverage_10/group_std_mean": 0.1985825330018997, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04617907330393791, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002279198868200183, "signal/frontier_coverage_15/centered_abs_mean": 0.10327828973531723, "signal/frontier_coverage_15/group_std_mean": 0.12951961457729338, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02991574816405773, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014768795343115925, "signal/frontier_coverage_20/centered_abs_mean": 0.05653135553002357, "signal/frontier_coverage_20/group_std_mean": 0.0712839536368847, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016467047110199928, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008083983790129423, "signal/frontier_coverage_25/centered_abs_mean": 0.05165333226323128, "signal/frontier_coverage_25/group_std_mean": 0.06579789370298386, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015181095898151397, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007386426557786762, "signal/frontier_coverage_5/centered_abs_mean": 0.16109590530395507, "signal/frontier_coverage_5/group_std_mean": 0.20070102512836457, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.046673715114593506, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023036715108901264, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.2753097414970398, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3474380552768707, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5634862422943115, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.02753097340464592, "step": 310 }, { "calibration/aurc": 0.2724536165131359, "calibration/batch_distribution_entropy": 0.9669139741272306, "calibration/buffer_distribution_entropy": 0.9970807525569785, "calibration/confidence_entropy": 0.45874800822237394, "calibration/coverage@0%": 0.0029296875, "calibration/coverage@1%": 0.0029296875, "calibration/coverage@10%": 0.0029296875, "calibration/coverage@15%": 0.0087890625, "calibration/coverage@20%": 0.23828125, "calibration/coverage@25%": 0.5478515625, "calibration/coverage@30%": 0.736328125, "calibration/coverage@5%": 0.0029296875, "calibration/ece": 0.12426718671766204, "calibration/mean_confidence": 0.5752325127324063, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 593.5, "completions/max_terminated_length": 593.5, "completions/mean_length": 256.4180374145508, "completions/mean_terminated_length": 256.5437545776367, "completions/min_length": 58.0, "completions/min_terminated_length": 128.0, "epoch": 0.9984, "num_tokens": 1064347934.0, "reward": 0.9560622274875641, "reward_std": 0.0776829868555069, "rewards/accuracy_reward": 0.59228515625, "rewards/brier_reward": 0.7798943519592285, "rewards/confidence_uniqueness_reward": 0.9535358250141144, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.0610650721937418, "rewards/frontier_coverage_1": 0.0610650721937418, "rewards/frontier_coverage_10": 0.061735767871141434, "rewards/frontier_coverage_15": 0.04953071102499962, "rewards/frontier_coverage_20": 0.04053525626659393, "rewards/frontier_coverage_25": 0.05742606520652771, "rewards/frontier_coverage_5": 0.06107356771826744, "rewards/frontier_entropy_batch_reward": -0.18790987133979797, "signal/accuracy_reward/centered_abs_mean": 0.079864501953125, "signal/accuracy_reward/group_std_mean": 0.11184961348772049, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8313649296760559, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0399322509765625, "signal/advantage_abs_mean": 0.7595762014389038, "signal/advantage_pre_scale_abs_mean": 0.05862266570329666, "signal/advantage_pre_scale_std": 0.09895920753479004, "signal/advantage_std": 0.9824418723583221, "signal/brier_reward/centered_abs_mean": 0.10770522058010101, "signal/brier_reward/group_std_mean": 0.13640563189983368, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2242312952876091, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010770522058010101, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.011800288688391447, "signal/confidence_uniqueness_reward/group_std_mean": 0.016339605674147606, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024581880308687687, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001180028892122209, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.009899882599711418, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.14607372134923935, "signal/frontier_coverage_0/group_std_mean": 0.18750649690628052, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.043485309928655624, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020888540893793106, "signal/frontier_coverage_1/centered_abs_mean": 0.14607372134923935, "signal/frontier_coverage_1/group_std_mean": 0.18750649690628052, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.043485309928655624, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020888540893793106, "signal/frontier_coverage_10/centered_abs_mean": 0.14458149671554565, "signal/frontier_coverage_10/group_std_mean": 0.18554429709911346, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.04304126277565956, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002067515510134399, "signal/frontier_coverage_15/centered_abs_mean": 0.09289034456014633, "signal/frontier_coverage_15/group_std_mean": 0.11935219541192055, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027649453841149807, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001328331942204386, "signal/frontier_coverage_20/centered_abs_mean": 0.05580424703657627, "signal/frontier_coverage_20/group_std_mean": 0.07091843336820602, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016613470390439034, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0007980007212609053, "signal/frontier_coverage_25/centered_abs_mean": 0.05803663656115532, "signal/frontier_coverage_25/group_std_mean": 0.07403568923473358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017281348817050457, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008299238979816437, "signal/frontier_coverage_5/centered_abs_mean": 0.14606471359729767, "signal/frontier_coverage_5/group_std_mean": 0.18749448657035828, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04348263330757618, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002088725450448692, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25206458568573, "signal/frontier_entropy_batch_reward/group_std_mean": 0.326417937874794, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5250041484832764, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025206458754837513, "step": 312, "total_flos": 0.0, "train_loss": -0.0004958666193907937, "train_runtime": 60044.1605, "train_samples_per_second": 0.333, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1064347934, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }