{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 50, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.6291125962699202, "calibration/batch_distribution_entropy": 0.6579274024264119, "calibration/buffer_distribution_entropy": 0.6317432625270244, "calibration/confidence_entropy": 0.3463624208288166, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.49361065763556233, "calibration/mean_confidence": 0.7892375473378748, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03330078125, "completions/max_length": 1506.2, "completions/max_terminated_length": 1506.2, "completions/mean_length": 217.56533203125, "completions/mean_terminated_length": 225.04311828613282, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.016, "grad_norm": 0.03844146803021431, "learning_rate": 3.1249999999999997e-07, "loss": 0.0107, "num_tokens": 17071901.0, "reward": 0.5538008451461792, "reward_std": 0.4007190465927124, "rewards/accuracy_reward": 0.2220703125, "rewards/brier_reward": 0.37496077418327334, "rewards/confidence_uniqueness_reward": 0.48197067975997926, "rewards/format_reward": 0.684375, "rewards/frontier_coverage_0": 0.14870173409581183, "rewards/frontier_coverage_1": 0.14870173409581183, "rewards/frontier_coverage_10": 0.14870173409581183, "rewards/frontier_coverage_15": 0.14870173409581183, "rewards/frontier_coverage_20": 0.14870173409581183, "rewards/frontier_coverage_25": 0.14870173409581183, "rewards/frontier_coverage_5": 0.14870173409581183, "signal/accuracy_reward/centered_abs_mean": 0.23719482421875, "signal/accuracy_reward/group_std_mean": 0.28042069971561434, "signal/accuracy_reward/group_zero_std_frac": 0.31875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.118597412109375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.118597412109375, "signal/advantage_abs_mean": 0.34127363562583923, "signal/advantage_pre_scale_abs_mean": 0.34127363562583923, "signal/advantage_pre_scale_std": 0.40609942078590394, "signal/advantage_std": 0.40609942078590394, "signal/brier_reward/centered_abs_mean": 0.31728167533874513, "signal/brier_reward/group_std_mean": 0.36332341432571413, "signal/brier_reward/group_zero_std_frac": 0.003125, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.031728167459368704, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031728167459368704, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.29734116792678833, "signal/confidence_uniqueness_reward/group_std_mean": 0.34809759855270384, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029734116792678834, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.029734116792678834, "signal/format_reward/centered_abs_mean": 0.4034423828125, "signal/format_reward/group_std_mean": 0.45386149883270266, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.20172119140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.20172119140625, "signal/frontier_coverage_0/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_0/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_0/group_zero_std_frac": 0.003125, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_1/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_1/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_1/group_zero_std_frac": 0.003125, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_10/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_10/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_10/group_zero_std_frac": 0.003125, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_15/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_15/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_15/group_zero_std_frac": 0.003125, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_20/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_20/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_20/group_zero_std_frac": 0.003125, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_25/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_25/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_25/group_zero_std_frac": 0.003125, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_5/centered_abs_mean": 0.15856626331806184, "signal/frontier_coverage_5/group_std_mean": 0.20916377156972885, "signal/frontier_coverage_5/group_zero_std_frac": 0.003125, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002267497556749731, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002267497556749731, "step": 5 }, { "calibration/aurc": 0.6663210426183143, "calibration/batch_distribution_entropy": 0.6396787062068319, "calibration/buffer_distribution_entropy": 0.6579678305027443, "calibration/confidence_entropy": 0.34138574234262214, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5356770471461247, "calibration/mean_confidence": 0.7990239861496942, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0384765625, "completions/max_length": 1505.0, "completions/max_terminated_length": 1505.0, "completions/mean_length": 207.08623046875, "completions/mean_terminated_length": 215.4260681152344, "completions/min_length": 0.0, "completions/min_terminated_length": 1.8, "epoch": 0.032, "grad_norm": 0.02313925139605999, "learning_rate": 6.249999999999999e-07, "loss": 0.0036, "num_tokens": 34292816.0, "reward": 0.5559734106063843, "reward_std": 0.3749539077281952, "rewards/accuracy_reward": 0.2087890625, "rewards/brier_reward": 0.37693819403648376, "rewards/confidence_uniqueness_reward": 0.5149972319602967, "rewards/format_reward": 0.71552734375, "rewards/frontier_coverage_0": 0.0461702860891819, "rewards/frontier_coverage_1": 0.0461702860891819, "rewards/frontier_coverage_10": 0.0461702860891819, "rewards/frontier_coverage_15": 0.0461702860891819, "rewards/frontier_coverage_20": 0.0461702860891819, "rewards/frontier_coverage_25": 0.0461702860891819, "rewards/frontier_coverage_5": 0.0461702860891819, "signal/accuracy_reward/centered_abs_mean": 0.2244140625, "signal/accuracy_reward/group_std_mean": 0.2725442975759506, "signal/accuracy_reward/group_zero_std_frac": 0.3125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.11220703125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11220703125, "signal/advantage_abs_mean": 0.31165753602981566, "signal/advantage_pre_scale_abs_mean": 0.31165753602981566, "signal/advantage_pre_scale_std": 0.379498028755188, "signal/advantage_std": 0.379498028755188, "signal/brier_reward/centered_abs_mean": 0.30591880679130556, "signal/brier_reward/group_std_mean": 0.35432358980178835, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.030591881647706032, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030591881647706032, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.28595311045646665, "signal/confidence_uniqueness_reward/group_std_mean": 0.342820942401886, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02859531156718731, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02859531156718731, "signal/format_reward/centered_abs_mean": 0.380755615234375, "signal/format_reward/group_std_mean": 0.4403470873832703, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1903778076171875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1903778076171875, "signal/frontier_coverage_0/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_0/group_std_mean": 0.125508613884449, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_1/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_1/group_std_mean": 0.125508613884449, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_10/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_10/group_std_mean": 0.125508613884449, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_15/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_15/group_std_mean": 0.125508613884449, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_20/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_20/group_std_mean": 0.125508613884449, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_25/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_25/group_std_mean": 0.125508613884449, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_5/centered_abs_mean": 0.07552953511476516, "signal/frontier_coverage_5/group_std_mean": 0.125508613884449, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0010800723102875054, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0010800723102875054, "step": 10 }, { "calibration/aurc": 0.6035156031942368, "calibration/batch_distribution_entropy": 0.6349305603027458, "calibration/buffer_distribution_entropy": 0.6554486012564013, "calibration/confidence_entropy": 0.34504651675502557, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46748896032825493, "calibration/mean_confidence": 0.8060340434356423, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01787109375, "completions/max_length": 1460.4, "completions/max_terminated_length": 1460.4, "completions/mean_length": 174.453125, "completions/mean_terminated_length": 177.67768249511718, "completions/min_length": 0.0, "completions/min_terminated_length": 17.6, "epoch": 0.048, "grad_norm": 0.016342012211680412, "learning_rate": 9.374999999999999e-07, "loss": 0.0069, "num_tokens": 51127952.0, "reward": 0.6984765291213989, "reward_std": 0.28357043862342834, "rewards/accuracy_reward": 0.27353515625, "rewards/brier_reward": 0.4843921780586243, "rewards/confidence_uniqueness_reward": 0.6443804860115051, "rewards/format_reward": 0.8849609375, "rewards/frontier_coverage_0": 0.06344863995909691, "rewards/frontier_coverage_1": 0.06344863995909691, "rewards/frontier_coverage_10": 0.06344863995909691, "rewards/frontier_coverage_15": 0.06344863995909691, "rewards/frontier_coverage_20": 0.06344863995909691, "rewards/frontier_coverage_25": 0.06344863995909691, "rewards/frontier_coverage_5": 0.06344863995909691, "signal/accuracy_reward/centered_abs_mean": 0.205230712890625, "signal/accuracy_reward/group_std_mean": 0.25404774844646455, "signal/accuracy_reward/group_zero_std_frac": 0.340625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1026153564453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1026153564453125, "signal/advantage_abs_mean": 0.20717814862728118, "signal/advantage_pre_scale_abs_mean": 0.20717814862728118, "signal/advantage_pre_scale_std": 0.29353936910629275, "signal/advantage_std": 0.29353936910629275, "signal/brier_reward/centered_abs_mean": 0.2758757948875427, "signal/brier_reward/group_std_mean": 0.33000465035438536, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.02758757919073105, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02758757919073105, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.19257701933383942, "signal/confidence_uniqueness_reward/group_std_mean": 0.2580618649721146, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01925770305097103, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01925770305097103, "signal/format_reward/centered_abs_mean": 0.1892578125, "signal/format_reward/group_std_mean": 0.29168896079063417, "signal/format_reward/group_zero_std_frac": 0.0625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.09462890625, "signal/frontier_coverage_0/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_0/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_1/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_1/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_10/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_10/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_15/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_15/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_20/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_20/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_25/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_25/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_5/centered_abs_mean": 0.09350458979606628, "signal/frontier_coverage_5/group_std_mean": 0.14712692201137542, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013371156295761467, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013371156295761467, "step": 15 }, { "calibration/aurc": 0.5439649409788888, "calibration/batch_distribution_entropy": 0.6677070099797058, "calibration/buffer_distribution_entropy": 0.6549928867820553, "calibration/confidence_entropy": 0.35836314133723945, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4115229909417037, "calibration/mean_confidence": 0.8007890667770411, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0044921875, "completions/max_length": 1172.0, "completions/max_terminated_length": 1172.0, "completions/mean_length": 131.76064453125, "completions/mean_terminated_length": 132.35692749023437, "completions/min_length": 0.0, "completions/min_terminated_length": 31.6, "epoch": 0.064, "grad_norm": 0.0032985019497573376, "learning_rate": 1e-06, "loss": -0.0021, "num_tokens": 67395581.0, "reward": 0.7972017049789428, "reward_std": 0.18594848811626435, "rewards/accuracy_reward": 0.34326171875, "rewards/brier_reward": 0.5591598868370056, "rewards/confidence_uniqueness_reward": 0.7354918837547302, "rewards/format_reward": 0.9814453125, "rewards/frontier_coverage_0": 0.053776346147060394, "rewards/frontier_coverage_1": 0.053776346147060394, "rewards/frontier_coverage_10": 0.053776346147060394, "rewards/frontier_coverage_15": 0.053776346147060394, "rewards/frontier_coverage_20": 0.053776346147060394, "rewards/frontier_coverage_25": 0.053776346147060394, "rewards/frontier_coverage_5": 0.053776346147060394, "signal/accuracy_reward/centered_abs_mean": 0.200811767578125, "signal/accuracy_reward/group_std_mean": 0.2528608232736588, "signal/accuracy_reward/group_zero_std_frac": 0.33125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.1004058837890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1004058837890625, "signal/advantage_abs_mean": 0.13703744262456893, "signal/advantage_pre_scale_abs_mean": 0.13703744262456893, "signal/advantage_pre_scale_std": 0.20460715293884277, "signal/advantage_std": 0.20460715293884277, "signal/brier_reward/centered_abs_mean": 0.23956941366195678, "signal/brier_reward/group_std_mean": 0.295298820734024, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023956941068172456, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023956941068172456, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.1292675703763962, "signal/confidence_uniqueness_reward/group_std_mean": 0.16289211213588714, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.012926757708191872, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012926757708191872, "signal/format_reward/centered_abs_mean": 0.03507080078125, "signal/format_reward/group_std_mean": 0.08618362993001938, "signal/format_reward/group_zero_std_frac": 0.571875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017535400390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017535400390625, "signal/frontier_coverage_0/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_0/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_1/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_1/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_10/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_10/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_15/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_15/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_20/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_20/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_25/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_25/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_5/centered_abs_mean": 0.09282867759466171, "signal/frontier_coverage_5/group_std_mean": 0.14971067011356354, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013274500845000148, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0013274500845000148, "step": 20 }, { "calibration/aurc": 0.6445319959277507, "calibration/batch_distribution_entropy": 0.7455506382256587, "calibration/buffer_distribution_entropy": 0.6783067089120915, "calibration/confidence_entropy": 0.41936467109944864, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4475651051804454, "calibration/mean_confidence": 0.7565154097650527, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 698.6, "completions/max_terminated_length": 698.6, "completions/mean_length": 109.54462890625, "completions/mean_terminated_length": 109.67274932861328, "completions/min_length": 0.0, "completions/min_terminated_length": 36.6, "epoch": 0.08, "grad_norm": 0.006463681813329458, "learning_rate": 1e-06, "loss": -0.0026, "num_tokens": 83450470.0, "reward": 0.8225439548492431, "reward_std": 0.15419970750808715, "rewards/accuracy_reward": 0.36044921875, "rewards/brier_reward": 0.5978257536888123, "rewards/confidence_uniqueness_reward": 0.8047454833984375, "rewards/format_reward": 0.99375, "rewards/frontier_coverage_0": 0.051820401847362516, "rewards/frontier_coverage_1": 0.051820401847362516, "rewards/frontier_coverage_10": 0.051820401847362516, "rewards/frontier_coverage_15": 0.051820401847362516, "rewards/frontier_coverage_20": 0.051820401847362516, "rewards/frontier_coverage_25": 0.051820401847362516, "rewards/frontier_coverage_5": 0.051820401847362516, "signal/accuracy_reward/centered_abs_mean": 0.184283447265625, "signal/accuracy_reward/group_std_mean": 0.2312079608440399, "signal/accuracy_reward/group_zero_std_frac": 0.384375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0921417236328125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0921417236328125, "signal/advantage_abs_mean": 0.11761517375707627, "signal/advantage_pre_scale_abs_mean": 0.11761517375707627, "signal/advantage_pre_scale_std": 0.17681845128536225, "signal/advantage_std": 0.17681845128536225, "signal/brier_reward/centered_abs_mean": 0.2217628002166748, "signal/brier_reward/group_std_mean": 0.27526147961616515, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0221762802451849, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0221762802451849, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07582313790917397, "signal/confidence_uniqueness_reward/group_std_mean": 0.10242749750614166, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007582314219325781, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007582314219325781, "signal/format_reward/centered_abs_mean": 0.012060546875, "signal/format_reward/group_std_mean": 0.03401010446250439, "signal/format_reward/group_zero_std_frac": 0.8125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0060302734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0060302734375, "signal/frontier_coverage_0/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_0/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_1/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_1/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_10/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_10/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_15/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_15/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_20/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_20/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_25/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_25/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_5/centered_abs_mean": 0.11067185401916504, "signal/frontier_coverage_5/group_std_mean": 0.16877196729183197, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001582607487216592, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001582607487216592, "step": 25 }, { "calibration/aurc": 0.6309762117309875, "calibration/batch_distribution_entropy": 0.8369892130203398, "calibration/buffer_distribution_entropy": 0.7564496460395704, "calibration/confidence_entropy": 0.5035340910722915, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.35330436394613157, "calibration/mean_confidence": 0.6835460635754658, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 811.2, "completions/max_terminated_length": 811.2, "completions/mean_length": 111.26328125, "completions/mean_terminated_length": 111.37155609130859, "completions/min_length": 0.0, "completions/min_terminated_length": 41.2, "epoch": 0.096, "grad_norm": 0.00296254875138402, "learning_rate": 1e-06, "loss": -0.002, "num_tokens": 99634414.0, "reward": 0.8360645532608032, "reward_std": 0.13758190870285034, "rewards/accuracy_reward": 0.36787109375, "rewards/brier_reward": 0.6485287547111511, "rewards/confidence_uniqueness_reward": 0.8281476020812988, "rewards/format_reward": 0.99609375, "rewards/frontier_coverage_0": 0.064080910384655, "rewards/frontier_coverage_1": 0.064080910384655, "rewards/frontier_coverage_10": 0.064080910384655, "rewards/frontier_coverage_15": 0.064080910384655, "rewards/frontier_coverage_20": 0.064080910384655, "rewards/frontier_coverage_25": 0.064080910384655, "rewards/frontier_coverage_5": 0.064080910384655, "signal/accuracy_reward/centered_abs_mean": 0.167340087890625, "signal/accuracy_reward/group_std_mean": 0.21798568665981294, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0836700439453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0836700439453125, "signal/advantage_abs_mean": 0.1038191020488739, "signal/advantage_pre_scale_abs_mean": 0.1038191020488739, "signal/advantage_pre_scale_std": 0.15961573123931885, "signal/advantage_std": 0.15961573123931885, "signal/brier_reward/centered_abs_mean": 0.20211856961250305, "signal/brier_reward/group_std_mean": 0.24915991723537445, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020211857557296754, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020211857557296754, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0711653858423233, "signal/confidence_uniqueness_reward/group_std_mean": 0.09434090554714203, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.007116538938134909, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007116538938134909, "signal/format_reward/centered_abs_mean": 0.0074951171875, "signal/format_reward/group_std_mean": 0.020079236291348935, "signal/format_reward/group_zero_std_frac": 0.89375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00374755859375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00374755859375, "signal/frontier_coverage_0/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_0/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_1/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_1/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_10/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_10/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_15/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_15/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_20/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_20/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_25/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_25/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_5/centered_abs_mean": 0.13252932876348494, "signal/frontier_coverage_5/group_std_mean": 0.1887018859386444, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018951693316921591, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018951693316921591, "step": 30 }, { "calibration/aurc": 0.4826404430899339, "calibration/batch_distribution_entropy": 0.8675797481644141, "calibration/buffer_distribution_entropy": 0.8366443607115505, "calibration/confidence_entropy": 0.5551013133970907, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.009025816552062868, "calibration/coverage@25%": 0.009811671168958742, "calibration/coverage@30%": 0.03527587992937305, "calibration/coverage@5%": 0.0, "calibration/ece": 0.19176894753818877, "calibration/mean_confidence": 0.6017602714878476, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 531.8, "completions/max_terminated_length": 531.8, "completions/mean_length": 119.00185546875, "completions/mean_terminated_length": 119.04827575683593, "completions/min_length": 18.2, "completions/min_terminated_length": 44.0, "epoch": 0.112, "grad_norm": 0.0017838370986282825, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 115962465.0, "reward": 0.8670443058013916, "reward_std": 0.13026245534420014, "rewards/accuracy_reward": 0.41376953125, "rewards/brier_reward": 0.7080369591712952, "rewards/confidence_uniqueness_reward": 0.8345065712928772, "rewards/format_reward": 0.99775390625, "rewards/frontier_coverage_0": 0.07021187543869019, "rewards/frontier_coverage_1": 0.07021187543869019, "rewards/frontier_coverage_10": 0.07021187543869019, "rewards/frontier_coverage_15": 0.07021187543869019, "rewards/frontier_coverage_20": 0.07021187543869019, "rewards/frontier_coverage_25": 0.07021187543869019, "rewards/frontier_coverage_5": 0.07021187543869019, "signal/accuracy_reward/centered_abs_mean": 0.171795654296875, "signal/accuracy_reward/group_std_mean": 0.22035529017448424, "signal/accuracy_reward/group_zero_std_frac": 0.39375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0858978271484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0858978271484375, "signal/advantage_abs_mean": 0.10090952962636948, "signal/advantage_pre_scale_abs_mean": 0.10090952962636948, "signal/advantage_pre_scale_std": 0.15009926557540892, "signal/advantage_std": 0.15009926557540892, "signal/brier_reward/centered_abs_mean": 0.17961066961288452, "signal/brier_reward/group_std_mean": 0.2244138687849045, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017961067706346513, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017961067706346513, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09093757271766663, "signal/confidence_uniqueness_reward/group_std_mean": 0.1149788647890091, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.009093757718801498, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009093757718801498, "signal/format_reward/centered_abs_mean": 0.004351806640625, "signal/format_reward/group_std_mean": 0.012705824617296458, "signal/format_reward/group_zero_std_frac": 0.928125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0021759033203125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021759033203125, "signal/frontier_coverage_0/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_0/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_1/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_1/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_10/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_10/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_15/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_15/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_20/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_20/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_25/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_25/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_5/centered_abs_mean": 0.17380160391330718, "signal/frontier_coverage_5/group_std_mean": 0.2313144624233246, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00248536285944283, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00248536285944283, "step": 35 }, { "calibration/aurc": 0.5205592590994277, "calibration/batch_distribution_entropy": 0.8709230881186457, "calibration/buffer_distribution_entropy": 0.880652703907203, "calibration/confidence_entropy": 0.5875897956550009, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.01761252446183953, "calibration/coverage@5%": 0.0, "calibration/ece": 0.1345443869115383, "calibration/mean_confidence": 0.48074756249087625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 429.2, "completions/max_terminated_length": 429.2, "completions/mean_length": 127.03271484375, "completions/mean_terminated_length": 127.1189453125, "completions/min_length": 10.4, "completions/min_terminated_length": 48.6, "epoch": 0.128, "grad_norm": 0.0014243983896449208, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 132179952.0, "reward": 0.87330641746521, "reward_std": 0.11022595316171646, "rewards/accuracy_reward": 0.412109375, "rewards/brier_reward": 0.7398777008056641, "rewards/confidence_uniqueness_reward": 0.8477198123931885, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.09117618799209595, "rewards/frontier_coverage_1": 0.09117618799209595, "rewards/frontier_coverage_10": 0.09117618799209595, "rewards/frontier_coverage_15": 0.09117618799209595, "rewards/frontier_coverage_20": 0.09117618799209595, "rewards/frontier_coverage_25": 0.09117618799209595, "rewards/frontier_coverage_5": 0.09117618799209595, "signal/accuracy_reward/centered_abs_mean": 0.15081787109375, "signal/accuracy_reward/group_std_mean": 0.19553602039813994, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.075408935546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.075408935546875, "signal/advantage_abs_mean": 0.08564974516630172, "signal/advantage_pre_scale_abs_mean": 0.08564974516630172, "signal/advantage_pre_scale_std": 0.1290176823735237, "signal/advantage_std": 0.1290176823735237, "signal/brier_reward/centered_abs_mean": 0.16081323921680452, "signal/brier_reward/group_std_mean": 0.2026536852121353, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01608132477849722, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01608132477849722, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08570207059383392, "signal/confidence_uniqueness_reward/group_std_mean": 0.10545764565467834, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00857020691037178, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00857020691037178, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181552890688181, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_coverage_0/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_0/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_1/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_1/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_10/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_10/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_15/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_15/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_20/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_20/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_25/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_25/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_5/centered_abs_mean": 0.20869247913360595, "signal/frontier_coverage_5/group_std_mean": 0.26470946073532103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002984302304685116, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002984302304685116, "step": 40 }, { "calibration/aurc": 0.3661014364381845, "calibration/batch_distribution_entropy": 0.8654614858918537, "calibration/buffer_distribution_entropy": 0.8948939325024583, "calibration/confidence_entropy": 0.5692098412044415, "calibration/coverage@0%": 0.007436399217221135, "calibration/coverage@1%": 0.007436399217221135, "calibration/coverage@10%": 0.03444227005870841, "calibration/coverage@15%": 0.05870841487279843, "calibration/coverage@20%": 0.09041095890410958, "calibration/coverage@25%": 0.213695572407045, "calibration/coverage@30%": 0.30481286692759296, "calibration/coverage@5%": 0.007436399217221135, "calibration/ece": 0.23127731773099808, "calibration/mean_confidence": 0.40613134023133063, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 361.4, "completions/max_terminated_length": 361.4, "completions/mean_length": 134.53935546875, "completions/mean_terminated_length": 134.60521240234374, "completions/min_length": 11.0, "completions/min_terminated_length": 52.2, "epoch": 0.144, "grad_norm": 0.001966482726857066, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 148508067.0, "reward": 0.9146429896354675, "reward_std": 0.10571834594011306, "rewards/accuracy_reward": 0.5076171875, "rewards/brier_reward": 0.7293356657028198, "rewards/confidence_uniqueness_reward": 0.8530200600624085, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.031327979266643526, "rewards/frontier_coverage_1": 0.031327979266643526, "rewards/frontier_coverage_10": 0.031327979266643526, "rewards/frontier_coverage_15": 0.031327979266643526, "rewards/frontier_coverage_20": 0.031327979266643526, "rewards/frontier_coverage_25": 0.031327979266643526, "rewards/frontier_coverage_5": 0.031327979266643526, "signal/accuracy_reward/centered_abs_mean": 0.15548095703125, "signal/accuracy_reward/group_std_mean": 0.20547467172145845, "signal/accuracy_reward/group_zero_std_frac": 0.4125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.077740478515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.077740478515625, "signal/advantage_abs_mean": 0.08054517805576325, "signal/advantage_pre_scale_abs_mean": 0.08054517805576325, "signal/advantage_pre_scale_std": 0.12199195474386215, "signal/advantage_std": 0.12199195474386215, "signal/brier_reward/centered_abs_mean": 0.16138457357883454, "signal/brier_reward/group_std_mean": 0.20098112821578978, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016138457320630552, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016138457320630552, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08552988022565841, "signal/confidence_uniqueness_reward/group_std_mean": 0.10412109196186066, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008552988339215518, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008552988339215518, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390438586473, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_0/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_1/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_1/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_10/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_10/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_15/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_15/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_20/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_20/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_25/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_25/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_5/centered_abs_mean": 0.23797725439071654, "signal/frontier_coverage_5/group_std_mean": 0.2977425754070282, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034030748065561056, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034030748065561056, "step": 45 }, { "calibration/aurc": 0.42311159847761515, "calibration/batch_distribution_entropy": 0.8411299022417736, "calibration/buffer_distribution_entropy": 0.8770051162966203, "calibration/confidence_entropy": 0.5590246418593998, "calibration/coverage@0%": 0.0074310481898238745, "calibration/coverage@1%": 0.0074310481898238745, "calibration/coverage@10%": 0.014476057974559687, "calibration/coverage@15%": 0.02190710616438356, "calibration/coverage@20%": 0.029335861056751466, "calibration/coverage@25%": 0.04691398605675147, "calibration/coverage@30%": 0.1594415056262231, "calibration/coverage@5%": 0.0074310481898238745, "calibration/ece": 0.12798997594433206, "calibration/mean_confidence": 0.3582420598922628, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 459.4, "completions/max_terminated_length": 459.4, "completions/mean_length": 146.5173828125, "completions/mean_terminated_length": 146.54682312011718, "completions/min_length": 34.2, "completions/min_terminated_length": 59.8, "epoch": 0.16, "grad_norm": 0.0011785045498982072, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 165029333.0, "reward": 0.8984348177909851, "reward_std": 0.0986061379313469, "rewards/accuracy_reward": 0.4591796875, "rewards/brier_reward": 0.748118782043457, "rewards/confidence_uniqueness_reward": 0.854450786113739, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.088801159709692, "rewards/frontier_coverage_1": 0.088801159709692, "rewards/frontier_coverage_10": 0.088801159709692, "rewards/frontier_coverage_15": 0.088801159709692, "rewards/frontier_coverage_20": 0.08754920139908791, "rewards/frontier_coverage_25": 0.08266227170825005, "rewards/frontier_coverage_5": 0.088801159709692, "signal/accuracy_reward/centered_abs_mean": 0.15191650390625, "signal/accuracy_reward/group_std_mean": 0.19589066207408906, "signal/accuracy_reward/group_zero_std_frac": 0.453125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.075958251953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.075958251953125, "signal/advantage_abs_mean": 0.07748261094093323, "signal/advantage_pre_scale_abs_mean": 0.07748261094093323, "signal/advantage_pre_scale_std": 0.11596368849277497, "signal/advantage_std": 0.11596368849277497, "signal/brier_reward/centered_abs_mean": 0.15048568248748778, "signal/brier_reward/group_std_mean": 0.18865303993225097, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015048568695783615, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015048568695783615, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0896127089858055, "signal/confidence_uniqueness_reward/group_std_mean": 0.11197478771209717, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.008961271494626999, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008961271494626999, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.24175618290901185, "signal/frontier_coverage_0/group_std_mean": 0.30336724519729613, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_1/centered_abs_mean": 0.24175618290901185, "signal/frontier_coverage_1/group_std_mean": 0.30336724519729613, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_10/centered_abs_mean": 0.24175618290901185, "signal/frontier_coverage_10/group_std_mean": 0.30336724519729613, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_15/centered_abs_mean": 0.24175618290901185, "signal/frontier_coverage_15/group_std_mean": 0.30336724519729613, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_20/centered_abs_mean": 0.23931825459003447, "signal/frontier_coverage_20/group_std_mean": 0.3001833617687225, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034222510643303395, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034222510643303395, "signal/frontier_coverage_25/centered_abs_mean": 0.2265275925397873, "signal/frontier_coverage_25/group_std_mean": 0.2843614399433136, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0032393445260822775, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0032393445260822775, "signal/frontier_coverage_5/centered_abs_mean": 0.24175618290901185, "signal/frontier_coverage_5/group_std_mean": 0.30336724519729613, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003457113401964307, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003457113401964307, "step": 50 }, { "epoch": 0.16, "eval_calibration/aurc": 0.5840922316708607, "eval_calibration/batch_distribution_entropy": 0.7662788198859265, "eval_calibration/buffer_distribution_entropy": 0.8621926897802064, "eval_calibration/confidence_entropy": 0.5495364859557058, "eval_calibration/coverage@0%": 0.0625, "eval_calibration/coverage@1%": 0.0625, "eval_calibration/coverage@10%": 0.0625, "eval_calibration/coverage@15%": 0.0625, "eval_calibration/coverage@20%": 0.0703125, "eval_calibration/coverage@25%": 0.0703125, "eval_calibration/coverage@30%": 0.109375, "eval_calibration/coverage@5%": 0.0625, "eval_calibration/ece": 0.18527343750000003, "eval_calibration/mean_confidence": 0.35605468749999997, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 322.0, "eval_completions/max_terminated_length": 322.0, "eval_completions/mean_length": 155.77276229858398, "eval_completions/mean_terminated_length": 155.77276229858398, "eval_completions/min_length": 84.0, "eval_completions/min_terminated_length": 84.0, "eval_loss": 0.0, "eval_num_tokens": 165029333.0, "eval_reward": 0.8566556125879288, "eval_reward_std": 0.19755794480443, "eval_rewards/accuracy_reward": 0.3671875, "eval_rewards/brier_reward": 0.7759375870227814, "eval_rewards/confidence_uniqueness_reward": 0.7936696708202362, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_coverage_0": 0.1754572968930006, "eval_rewards/frontier_coverage_1": 0.1754572968930006, "eval_rewards/frontier_coverage_10": 0.1754572968930006, "eval_rewards/frontier_coverage_15": 0.17472604848444462, "eval_rewards/frontier_coverage_20": 0.167778592556715, "eval_rewards/frontier_coverage_25": 0.14990984462201595, "eval_rewards/frontier_coverage_5": 0.1754572968930006, "eval_runtime": 17.1563, "eval_samples_per_second": 29.144, "eval_signal/accuracy_reward/centered_abs_mean": 0.44873046875, "eval_signal/accuracy_reward/group_std_mean": 0.48020416498184204, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.224365234375, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.224365234375, "eval_signal/advantage_abs_mean": 0.1783592328429222, "eval_signal/advantage_pre_scale_abs_mean": 0.1783592328429222, "eval_signal/advantage_pre_scale_std": 0.19551436230540276, "eval_signal/advantage_std": 0.19551436230540276, "eval_signal/brier_reward/centered_abs_mean": 0.17557190358638763, "eval_signal/brier_reward/group_std_mean": 0.22150396928191185, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01755719119682908, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01755719119682908, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1074361503124237, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12650343775749207, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.010743614984676242, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010743614984676242, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3991696313023567, "eval_signal/frontier_coverage_0/group_std_mean": 0.46928417682647705, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3991696313023567, "eval_signal/frontier_coverage_1/group_std_mean": 0.46928417682647705, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3991696313023567, "eval_signal/frontier_coverage_10/group_std_mean": 0.46928417682647705, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3979253023862839, "eval_signal/frontier_coverage_15/group_std_mean": 0.46783114969730377, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005690331454388797, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005690331454388797, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3867493271827698, "eval_signal/frontier_coverage_20/group_std_mean": 0.4546607509255409, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005530515452846885, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005530515452846885, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3621833994984627, "eval_signal/frontier_coverage_25/group_std_mean": 0.42486173659563065, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005179222673177719, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005179222673177719, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.3991696313023567, "eval_signal/frontier_coverage_5/group_std_mean": 0.46928417682647705, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0057081254199147224, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0057081254199147224, "eval_steps_per_second": 0.233, "step": 50 }, { "calibration/aurc": 0.4252549136754009, "calibration/batch_distribution_entropy": 0.9125482359893882, "calibration/buffer_distribution_entropy": 0.8660492759848346, "calibration/confidence_entropy": 0.5548617338405014, "calibration/coverage@0%": 0.005087298189823874, "calibration/coverage@1%": 0.005087298189823874, "calibration/coverage@10%": 0.0054786876223091975, "calibration/coverage@15%": 0.0054786876223091975, "calibration/coverage@20%": 0.0054786876223091975, "calibration/coverage@25%": 0.06684809197651663, "calibration/coverage@30%": 0.17233824608610568, "calibration/coverage@5%": 0.005087298189823874, "calibration/ece": 0.17347517265132364, "calibration/mean_confidence": 0.40544332355709123, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 453.6, "completions/max_terminated_length": 453.6, "completions/mean_length": 159.16650390625, "completions/mean_terminated_length": 159.22799682617188, "completions/min_length": 28.4, "completions/min_terminated_length": 66.8, "epoch": 0.176, "grad_norm": 0.0013124002143740654, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 181896318.0, "reward": 0.8989975333213807, "reward_std": 0.10167965888977051, "rewards/accuracy_reward": 0.45263671875, "rewards/brier_reward": 0.7518372178077698, "rewards/confidence_uniqueness_reward": 0.8766632199287414, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.10452535524964332, "rewards/frontier_coverage_1": 0.10452535524964332, "rewards/frontier_coverage_10": 0.10452535524964332, "rewards/frontier_coverage_15": 0.10412113443017006, "rewards/frontier_coverage_20": 0.10193085297942162, "rewards/frontier_coverage_25": 0.09392795115709304, "rewards/frontier_coverage_5": 0.10452535524964332, "signal/accuracy_reward/centered_abs_mean": 0.151483154296875, "signal/accuracy_reward/group_std_mean": 0.1968166172504425, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0757415771484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0757415771484375, "signal/advantage_abs_mean": 0.07838663980364799, "signal/advantage_pre_scale_abs_mean": 0.07838663980364799, "signal/advantage_pre_scale_std": 0.11850336343050002, "signal/advantage_std": 0.11850336343050002, "signal/brier_reward/centered_abs_mean": 0.1553162842988968, "signal/brier_reward/group_std_mean": 0.19515422284603118, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01553162857890129, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01553162857890129, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06829209327697754, "signal/confidence_uniqueness_reward/group_std_mean": 0.0883951410651207, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.006829209346324206, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006829209346324206, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844470128417, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.24396900236606597, "signal/frontier_coverage_0/group_std_mean": 0.30476749539375303, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_1/centered_abs_mean": 0.24396900236606597, "signal/frontier_coverage_1/group_std_mean": 0.30476749539375303, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_10/centered_abs_mean": 0.24396900236606597, "signal/frontier_coverage_10/group_std_mean": 0.30476749539375303, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_15/centered_abs_mean": 0.24291383922100068, "signal/frontier_coverage_15/group_std_mean": 0.30346350073814393, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003473667986690998, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003473667986690998, "signal/frontier_coverage_20/centered_abs_mean": 0.23362279534339905, "signal/frontier_coverage_20/group_std_mean": 0.29205312132835387, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003340805834159255, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003340805834159255, "signal/frontier_coverage_25/centered_abs_mean": 0.1957554578781128, "signal/frontier_coverage_25/group_std_mean": 0.24552590250968934, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002799303038045764, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002799303038045764, "signal/frontier_coverage_5/centered_abs_mean": 0.24396900236606597, "signal/frontier_coverage_5/group_std_mean": 0.30476749539375303, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0034887567162513733, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034887567162513733, "step": 55 }, { "calibration/aurc": 0.3504090600956439, "calibration/batch_distribution_entropy": 0.9551707663234101, "calibration/buffer_distribution_entropy": 0.9138769951806276, "calibration/confidence_entropy": 0.5300997472761269, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.001171875, "calibration/coverage@15%": 0.027734375, "calibration/coverage@20%": 0.101171875, "calibration/coverage@25%": 0.199609375, "calibration/coverage@30%": 0.33515625, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.106851484375, "calibration/mean_confidence": 0.44491546875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 492.4, "completions/max_terminated_length": 492.4, "completions/mean_length": 169.7375, "completions/mean_terminated_length": 169.7375, "completions/min_length": 75.8, "completions/min_terminated_length": 75.8, "epoch": 0.192, "grad_norm": 0.0010296102846041322, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 198449246.0, "reward": 0.915556812286377, "reward_std": 0.09649229347705841, "rewards/accuracy_reward": 0.48076171875, "rewards/brier_reward": 0.7584408164024353, "rewards/confidence_uniqueness_reward": 0.9000682353973388, "rewards/format_reward": 0.99970703125, "rewards/frontier_coverage_0": 0.09730687141418456, "rewards/frontier_coverage_1": 0.09730687141418456, "rewards/frontier_coverage_10": 0.09755967259407043, "rewards/frontier_coverage_15": 0.09740926474332809, "rewards/frontier_coverage_20": 0.09416202008724213, "rewards/frontier_coverage_25": 0.08129325956106186, "rewards/frontier_coverage_5": 0.09730687141418456, "signal/accuracy_reward/centered_abs_mean": 0.134930419921875, "signal/accuracy_reward/group_std_mean": 0.17711263298988342, "signal/accuracy_reward/group_zero_std_frac": 0.496875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0674652099609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0674652099609375, "signal/advantage_abs_mean": 0.07492515221238136, "signal/advantage_pre_scale_abs_mean": 0.07492515221238136, "signal/advantage_pre_scale_std": 0.1148703083395958, "signal/advantage_std": 0.1148703083395958, "signal/brier_reward/centered_abs_mean": 0.1652140736579895, "signal/brier_reward/group_std_mean": 0.20634177327156067, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01652140785008669, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01652140785008669, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047895267605781555, "signal/confidence_uniqueness_reward/group_std_mean": 0.0618466705083847, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004789526853710413, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004789526853710413, "signal/format_reward/centered_abs_mean": 0.000567626953125, "signal/format_reward/group_std_mean": 0.0016572814900428056, "signal/format_reward/group_zero_std_frac": 0.990625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0002838134765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002838134765625, "signal/frontier_coverage_0/centered_abs_mean": 0.2331833630800247, "signal/frontier_coverage_0/group_std_mean": 0.2929345488548279, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003334522061049938, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003334522061049938, "signal/frontier_coverage_1/centered_abs_mean": 0.2331833630800247, "signal/frontier_coverage_1/group_std_mean": 0.2929345488548279, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003334522061049938, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003334522061049938, "signal/frontier_coverage_10/centered_abs_mean": 0.23196647465229034, "signal/frontier_coverage_10/group_std_mean": 0.29145088195800783, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033171205781400205, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033171205781400205, "signal/frontier_coverage_15/centered_abs_mean": 0.2315136820077896, "signal/frontier_coverage_15/group_std_mean": 0.29087979793548585, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003310645651072264, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003310645651072264, "signal/frontier_coverage_20/centered_abs_mean": 0.21936435103416443, "signal/frontier_coverage_20/group_std_mean": 0.2758824825286865, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031369101721793412, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031369101721793412, "signal/frontier_coverage_25/centered_abs_mean": 0.18077392876148224, "signal/frontier_coverage_25/group_std_mean": 0.22790210247039794, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025850672274827955, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025850672274827955, "signal/frontier_coverage_5/centered_abs_mean": 0.2331833630800247, "signal/frontier_coverage_5/group_std_mean": 0.2929345488548279, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003334522061049938, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003334522061049938, "step": 60 }, { "calibration/aurc": 0.30815469042987864, "calibration/batch_distribution_entropy": 0.9787380668407992, "calibration/buffer_distribution_entropy": 0.9640332476588573, "calibration/confidence_entropy": 0.5174168360083795, "calibration/coverage@0%": 0.0031288221624266145, "calibration/coverage@1%": 0.0031288221624266145, "calibration/coverage@10%": 0.03282855308219178, "calibration/coverage@15%": 0.14299474070450097, "calibration/coverage@20%": 0.3109780149217221, "calibration/coverage@25%": 0.43832788038160475, "calibration/coverage@30%": 0.5622477372798435, "calibration/coverage@5%": 0.0031288221624266145, "calibration/ece": 0.15338373930208382, "calibration/mean_confidence": 0.4815208716065659, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00029296875, "completions/max_length": 477.6, "completions/max_terminated_length": 477.6, "completions/mean_length": 178.60751953125, "completions/mean_terminated_length": 178.66034545898438, "completions/min_length": 45.8, "completions/min_terminated_length": 77.6, "epoch": 0.208, "grad_norm": 0.0010420128237456083, "learning_rate": 1e-06, "loss": 0.0004, "num_tokens": 215310411.0, "reward": 0.9366827011108398, "reward_std": 0.10523568391799927, "rewards/accuracy_reward": 0.525, "rewards/brier_reward": 0.7601073861122132, "rewards/confidence_uniqueness_reward": 0.910399055480957, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.07539150714874268, "rewards/frontier_coverage_1": 0.07539150714874268, "rewards/frontier_coverage_10": 0.07539150714874268, "rewards/frontier_coverage_15": 0.07539150714874268, "rewards/frontier_coverage_20": 0.07489909082651139, "rewards/frontier_coverage_25": 0.06396199613809586, "rewards/frontier_coverage_5": 0.07539150714874268, "signal/accuracy_reward/centered_abs_mean": 0.14537353515625, "signal/accuracy_reward/group_std_mean": 0.194059419631958, "signal/accuracy_reward/group_zero_std_frac": 0.44375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.072686767578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.072686767578125, "signal/advantage_abs_mean": 0.0802029699087143, "signal/advantage_pre_scale_abs_mean": 0.0802029699087143, "signal/advantage_pre_scale_std": 0.12353497147560119, "signal/advantage_std": 0.12353497147560119, "signal/brier_reward/centered_abs_mean": 0.17113058865070344, "signal/brier_reward/group_std_mean": 0.2146961957216263, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017113059386610986, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017113059386610986, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042478848993778226, "signal/confidence_uniqueness_reward/group_std_mean": 0.05312614291906357, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0042478849180042745, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042478849180042745, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.23045052886009215, "signal/frontier_coverage_0/group_std_mean": 0.2944092571735382, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_1/centered_abs_mean": 0.23045052886009215, "signal/frontier_coverage_1/group_std_mean": 0.2944092571735382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_10/centered_abs_mean": 0.23045052886009215, "signal/frontier_coverage_10/group_std_mean": 0.2944092571735382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_15/centered_abs_mean": 0.23045052886009215, "signal/frontier_coverage_15/group_std_mean": 0.2944092571735382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_20/centered_abs_mean": 0.2168228805065155, "signal/frontier_coverage_20/group_std_mean": 0.27752563655376433, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031005671713501214, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031005671713501214, "signal/frontier_coverage_25/centered_abs_mean": 0.1687493294477463, "signal/frontier_coverage_25/group_std_mean": 0.2179758220911026, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002413115510717034, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002413115510717034, "signal/frontier_coverage_5/centered_abs_mean": 0.23045052886009215, "signal/frontier_coverage_5/group_std_mean": 0.2944092571735382, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003295442508533597, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003295442508533597, "step": 65 }, { "calibration/aurc": 0.33759827970691464, "calibration/batch_distribution_entropy": 0.9855316102239617, "calibration/buffer_distribution_entropy": 0.9834165238932998, "calibration/confidence_entropy": 0.48700275597752746, "calibration/coverage@0%": 0.008609830062161851, "calibration/coverage@1%": 0.008609830062161851, "calibration/coverage@10%": 0.08637478236157477, "calibration/coverage@15%": 0.1509052529402172, "calibration/coverage@20%": 0.2587980183511761, "calibration/coverage@25%": 0.3334923262969571, "calibration/coverage@30%": 0.46375408896435283, "calibration/coverage@5%": 0.02150045506216185, "calibration/ece": 0.16715913862447945, "calibration/mean_confidence": 0.4651282833566417, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 871.4, "completions/max_terminated_length": 871.4, "completions/mean_length": 182.4677734375, "completions/mean_terminated_length": 182.62802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 80.8, "epoch": 0.224, "grad_norm": 0.0008849430014379323, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 232332065.0, "reward": 0.9144545674324036, "reward_std": 0.10017272233963012, "rewards/accuracy_reward": 0.47197265625, "rewards/brier_reward": 0.7604376077651978, "rewards/confidence_uniqueness_reward": 0.9159923315048217, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.1188489928841591, "rewards/frontier_coverage_1": 0.1188489928841591, "rewards/frontier_coverage_10": 0.1188489928841591, "rewards/frontier_coverage_15": 0.1186547577381134, "rewards/frontier_coverage_20": 0.11327697336673737, "rewards/frontier_coverage_25": 0.08724054023623466, "rewards/frontier_coverage_5": 0.1188489928841591, "signal/accuracy_reward/centered_abs_mean": 0.126641845703125, "signal/accuracy_reward/group_std_mean": 0.16965427994728088, "signal/accuracy_reward/group_zero_std_frac": 0.509375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0633209228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0633209228515625, "signal/advantage_abs_mean": 0.07543377876281739, "signal/advantage_pre_scale_abs_mean": 0.07543377876281739, "signal/advantage_pre_scale_std": 0.11968540549278259, "signal/advantage_std": 0.11968540549278259, "signal/brier_reward/centered_abs_mean": 0.1779782146215439, "signal/brier_reward/group_std_mean": 0.22407877445220947, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017797821387648582, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017797821387648582, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04430801272392273, "signal/confidence_uniqueness_reward/group_std_mean": 0.055218780785799025, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004430801328271628, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004430801328271628, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390345454216, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.22609589099884034, "signal/frontier_coverage_0/group_std_mean": 0.2900040984153748, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_1/centered_abs_mean": 0.22609589099884034, "signal/frontier_coverage_1/group_std_mean": 0.2900040984153748, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_10/centered_abs_mean": 0.22609589099884034, "signal/frontier_coverage_10/group_std_mean": 0.2900040984153748, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_15/centered_abs_mean": 0.2218073219060898, "signal/frontier_coverage_15/group_std_mean": 0.2847987115383148, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003171844594180584, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003171844594180584, "signal/frontier_coverage_20/centered_abs_mean": 0.2043167382478714, "signal/frontier_coverage_20/group_std_mean": 0.2630573481321335, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029217293485999107, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029217293485999107, "signal/frontier_coverage_25/centered_abs_mean": 0.15411110818386078, "signal/frontier_coverage_25/group_std_mean": 0.20011564791202546, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002203788794577122, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002203788794577122, "signal/frontier_coverage_5/centered_abs_mean": 0.22609589099884034, "signal/frontier_coverage_5/group_std_mean": 0.2900040984153748, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032331712543964388, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032331712543964388, "step": 70 }, { "calibration/aurc": 0.362449931517634, "calibration/batch_distribution_entropy": 0.9679039549098876, "calibration/buffer_distribution_entropy": 0.9911512020196188, "calibration/confidence_entropy": 0.4805309846266669, "calibration/coverage@0%": 0.006640625, "calibration/coverage@1%": 0.006640625, "calibration/coverage@10%": 0.113671875, "calibration/coverage@15%": 0.168359375, "calibration/coverage@20%": 0.2296875, "calibration/coverage@25%": 0.269921875, "calibration/coverage@30%": 0.325390625, "calibration/coverage@5%": 0.082421875, "calibration/ece": 0.17709150255198303, "calibration/mean_confidence": 0.4958924159529189, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 655.4, "completions/max_terminated_length": 655.4, "completions/mean_length": 187.84072265625, "completions/mean_terminated_length": 187.98792114257813, "completions/min_length": 15.4, "completions/min_terminated_length": 82.6, "epoch": 0.24, "grad_norm": 0.001173818134702742, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 249507234.0, "reward": 0.9425259709358216, "reward_std": 0.10851940363645554, "rewards/accuracy_reward": 0.5373046875, "rewards/brier_reward": 0.7545185923576355, "rewards/confidence_uniqueness_reward": 0.91850825548172, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.07295873025432228, "rewards/frontier_coverage_1": 0.07295873025432228, "rewards/frontier_coverage_10": 0.07295873025432228, "rewards/frontier_coverage_15": 0.07312564663589001, "rewards/frontier_coverage_20": 0.06827028058469295, "rewards/frontier_coverage_25": 0.06042120754718781, "rewards/frontier_coverage_5": 0.07295873025432228, "signal/accuracy_reward/centered_abs_mean": 0.14693603515625, "signal/accuracy_reward/group_std_mean": 0.19192145466804506, "signal/accuracy_reward/group_zero_std_frac": 0.459375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.073468017578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.073468017578125, "signal/advantage_abs_mean": 0.0840824693441391, "signal/advantage_pre_scale_abs_mean": 0.0840824693441391, "signal/advantage_pre_scale_std": 0.12857705354690552, "signal/advantage_std": 0.12857705354690552, "signal/brier_reward/centered_abs_mean": 0.18759630620479584, "signal/brier_reward/group_std_mean": 0.2353689730167389, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018759630247950555, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018759630247950555, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.047961297631263736, "signal/confidence_uniqueness_reward/group_std_mean": 0.05927042812108994, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0047961299307644365, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047961299307644365, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2316052109003067, "signal/frontier_coverage_0/group_std_mean": 0.3017585575580597, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_1/centered_abs_mean": 0.2316052109003067, "signal/frontier_coverage_1/group_std_mean": 0.3017585575580597, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_10/centered_abs_mean": 0.2316052109003067, "signal/frontier_coverage_10/group_std_mean": 0.3017585575580597, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_15/centered_abs_mean": 0.2291814088821411, "signal/frontier_coverage_15/group_std_mean": 0.2985960841178894, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003277294151484966, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003277294151484966, "signal/frontier_coverage_20/centered_abs_mean": 0.21456428468227387, "signal/frontier_coverage_20/group_std_mean": 0.28007822036743163, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030682692769914864, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030682692769914864, "signal/frontier_coverage_25/centered_abs_mean": 0.18277060687541963, "signal/frontier_coverage_25/group_std_mean": 0.23978594839572906, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0026136196684092283, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026136196684092283, "signal/frontier_coverage_5/centered_abs_mean": 0.2316052109003067, "signal/frontier_coverage_5/group_std_mean": 0.3017585575580597, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0033119544852524994, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033119544852524994, "step": 75 }, { "calibration/aurc": 0.3081827301890542, "calibration/batch_distribution_entropy": 0.9624072780197552, "calibration/buffer_distribution_entropy": 0.9915481436618189, "calibration/confidence_entropy": 0.43016921259442914, "calibration/coverage@0%": 0.018369312622309196, "calibration/coverage@1%": 0.018369312622309196, "calibration/coverage@10%": 0.09579409246575342, "calibration/coverage@15%": 0.2114825403620352, "calibration/coverage@20%": 0.3134922333659491, "calibration/coverage@25%": 0.41742447407045014, "calibration/coverage@30%": 0.5405080418297457, "calibration/coverage@5%": 0.041806812622309196, "calibration/ece": 0.13412265396494313, "calibration/mean_confidence": 0.4988568931342644, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 672.0, "completions/max_terminated_length": 672.0, "completions/mean_length": 185.28642578125, "completions/mean_terminated_length": 185.46622619628906, "completions/min_length": 15.6, "completions/min_terminated_length": 78.0, "epoch": 0.256, "grad_norm": 0.0011070192558690906, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 266459383.0, "reward": 0.9314518451690674, "reward_std": 0.10366167277097701, "rewards/accuracy_reward": 0.50625, "rewards/brier_reward": 0.7604737877845764, "rewards/confidence_uniqueness_reward": 0.9207041382789611, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.11289391908794641, "rewards/frontier_coverage_1": 0.11289391908794641, "rewards/frontier_coverage_10": 0.11289391908794641, "rewards/frontier_coverage_15": 0.1116494283080101, "rewards/frontier_coverage_20": 0.10403509885072708, "rewards/frontier_coverage_25": 0.08422165811061859, "rewards/frontier_coverage_5": 0.11289391908794641, "signal/accuracy_reward/centered_abs_mean": 0.13895263671875, "signal/accuracy_reward/group_std_mean": 0.18133105635643004, "signal/accuracy_reward/group_zero_std_frac": 0.4875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.069476318359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.069476318359375, "signal/advantage_abs_mean": 0.07949463427066802, "signal/advantage_pre_scale_abs_mean": 0.07949463427066802, "signal/advantage_pre_scale_std": 0.12502660751342773, "signal/advantage_std": 0.12502660751342773, "signal/brier_reward/centered_abs_mean": 0.18379815220832824, "signal/brier_reward/group_std_mean": 0.23144225180149078, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01837981529533863, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01837981529533863, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04970728680491447, "signal/confidence_uniqueness_reward/group_std_mean": 0.061461112648248675, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004970728792250157, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004970728792250157, "signal/format_reward/centered_abs_mean": 0.002081298828125, "signal/format_reward/group_std_mean": 0.006076698703691363, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010406494140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010406494140625, "signal/frontier_coverage_0/centered_abs_mean": 0.23146614134311677, "signal/frontier_coverage_0/group_std_mean": 0.298116660118103, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_1/centered_abs_mean": 0.23146614134311677, "signal/frontier_coverage_1/group_std_mean": 0.298116660118103, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_10/centered_abs_mean": 0.23146614134311677, "signal/frontier_coverage_10/group_std_mean": 0.298116660118103, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_15/centered_abs_mean": 0.22649968266487122, "signal/frontier_coverage_15/group_std_mean": 0.29187222719192507, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003238945361226797, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003238945361226797, "signal/frontier_coverage_20/centered_abs_mean": 0.21151622533798217, "signal/frontier_coverage_20/group_std_mean": 0.2728983283042908, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0030246819369494915, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030246819369494915, "signal/frontier_coverage_25/centered_abs_mean": 0.16591953337192536, "signal/frontier_coverage_25/group_std_mean": 0.21466890573501587, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002372649358585477, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002372649358585477, "signal/frontier_coverage_5/centered_abs_mean": 0.23146614134311677, "signal/frontier_coverage_5/group_std_mean": 0.298116660118103, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003309965645894408, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003309965645894408, "step": 80 }, { "calibration/aurc": 0.38003463125477943, "calibration/batch_distribution_entropy": 0.9735982384707729, "calibration/buffer_distribution_entropy": 0.9822089271000805, "calibration/confidence_entropy": 0.44327317264226096, "calibration/coverage@0%": 0.010548403864970645, "calibration/coverage@1%": 0.010548403864970645, "calibration/coverage@10%": 0.09376911081213307, "calibration/coverage@15%": 0.12697223581213307, "calibration/coverage@20%": 0.14298786081213308, "calibration/coverage@25%": 0.21884555406066536, "calibration/coverage@30%": 0.2950342465753425, "calibration/coverage@5%": 0.024610903864970647, "calibration/ece": 0.16382442094629304, "calibration/mean_confidence": 0.46472179519412593, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005859375, "completions/max_length": 708.2, "completions/max_terminated_length": 708.2, "completions/mean_length": 192.935546875, "completions/mean_terminated_length": 193.04912719726562, "completions/min_length": 15.4, "completions/min_terminated_length": 80.2, "epoch": 0.272, "grad_norm": 0.0011620813747867942, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 283400739.0, "reward": 0.9291788816452027, "reward_std": 0.09849026650190354, "rewards/accuracy_reward": 0.49697265625, "rewards/brier_reward": 0.7616158962249756, "rewards/confidence_uniqueness_reward": 0.9286294102668762, "rewards/format_reward": 0.9994140625, "rewards/frontier_coverage_0": 0.12657882794737815, "rewards/frontier_coverage_1": 0.12657882794737815, "rewards/frontier_coverage_10": 0.1257509134709835, "rewards/frontier_coverage_15": 0.1237585011869669, "rewards/frontier_coverage_20": 0.11603162102401257, "rewards/frontier_coverage_25": 0.09115448929369449, "rewards/frontier_coverage_5": 0.12657882794737815, "signal/accuracy_reward/centered_abs_mean": 0.133734130859375, "signal/accuracy_reward/group_std_mean": 0.17143784165382386, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0668670654296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0668670654296875, "signal/advantage_abs_mean": 0.07669400870800018, "signal/advantage_pre_scale_abs_mean": 0.07669400870800018, "signal/advantage_pre_scale_std": 0.11901784390211105, "signal/advantage_std": 0.11901784390211105, "signal/brier_reward/centered_abs_mean": 0.18369872272014617, "signal/brier_reward/group_std_mean": 0.23071206510066986, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018369871750473978, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018369871750473978, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04494693949818611, "signal/confidence_uniqueness_reward/group_std_mean": 0.05543198511004448, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004494693968445063, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004494693968445063, "signal/format_reward/centered_abs_mean": 0.00113525390625, "signal/format_reward/group_std_mean": 0.0033145629335194827, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000567626953125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000567626953125, "signal/frontier_coverage_0/centered_abs_mean": 0.24683951139450072, "signal/frontier_coverage_0/group_std_mean": 0.3129281342029572, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035298048984259366, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035298048984259366, "signal/frontier_coverage_1/centered_abs_mean": 0.24683951139450072, "signal/frontier_coverage_1/group_std_mean": 0.3129281342029572, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035298048984259366, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035298048984259366, "signal/frontier_coverage_10/centered_abs_mean": 0.24551962316036224, "signal/frontier_coverage_10/group_std_mean": 0.3112900614738464, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035109306219965218, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0035109306219965218, "signal/frontier_coverage_15/centered_abs_mean": 0.24138884544372557, "signal/frontier_coverage_15/group_std_mean": 0.30613497495651243, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0034518604166805743, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0034518604166805743, "signal/frontier_coverage_20/centered_abs_mean": 0.2232038915157318, "signal/frontier_coverage_20/group_std_mean": 0.2835902810096741, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0031918155495077372, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0031918155495077372, "signal/frontier_coverage_25/centered_abs_mean": 0.17182688117027284, "signal/frontier_coverage_25/group_std_mean": 0.21931109428405762, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0024571243207901715, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024571243207901715, "signal/frontier_coverage_5/centered_abs_mean": 0.24683951139450072, "signal/frontier_coverage_5/group_std_mean": 0.3129281342029572, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035298048984259366, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035298048984259366, "step": 85 }, { "calibration/aurc": 0.33664580545577377, "calibration/batch_distribution_entropy": 0.9753172988887358, "calibration/buffer_distribution_entropy": 0.9797373831788441, "calibration/confidence_entropy": 0.4395737894823409, "calibration/coverage@0%": 0.004303754892367906, "calibration/coverage@1%": 0.004303754892367906, "calibration/coverage@10%": 0.046573813600782776, "calibration/coverage@15%": 0.105613227739726, "calibration/coverage@20%": 0.20176201687866926, "calibration/coverage@25%": 0.24827773361056754, "calibration/coverage@30%": 0.36471761863992175, "calibration/coverage@5%": 0.020742111056751465, "calibration/ece": 0.13120126033724472, "calibration/mean_confidence": 0.48288364484083246, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 612.4, "completions/max_terminated_length": 612.4, "completions/mean_length": 186.45693359375, "completions/mean_terminated_length": 186.6018280029297, "completions/min_length": 13.4, "completions/min_terminated_length": 81.0, "epoch": 0.288, "grad_norm": 0.0010369827505201101, "learning_rate": 1e-06, "loss": -0.0003, "num_tokens": 300268234.0, "reward": 0.9325536012649536, "reward_std": 0.09712951034307479, "rewards/accuracy_reward": 0.50712890625, "rewards/brier_reward": 0.7498034596443176, "rewards/confidence_uniqueness_reward": 0.9337062835693359, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.11477768123149872, "rewards/frontier_coverage_1": 0.11477768123149872, "rewards/frontier_coverage_10": 0.11409241706132889, "rewards/frontier_coverage_15": 0.11274639219045639, "rewards/frontier_coverage_20": 0.11127715855836869, "rewards/frontier_coverage_25": 0.09562420099973679, "rewards/frontier_coverage_5": 0.11477768123149872, "signal/accuracy_reward/centered_abs_mean": 0.133929443359375, "signal/accuracy_reward/group_std_mean": 0.1750246465206146, "signal/accuracy_reward/group_zero_std_frac": 0.50625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0669647216796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0669647216796875, "signal/advantage_abs_mean": 0.07482761144638062, "signal/advantage_pre_scale_abs_mean": 0.07482761144638062, "signal/advantage_pre_scale_std": 0.11771952509880065, "signal/advantage_std": 0.11771952509880065, "signal/brier_reward/centered_abs_mean": 0.18906202912330627, "signal/brier_reward/group_std_mean": 0.23541579842567445, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01890620365738869, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01890620365738869, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04171065092086792, "signal/confidence_uniqueness_reward/group_std_mean": 0.053112023323774335, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.004171065147966146, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004171065147966146, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.00552427158690989, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2531146973371506, "signal/frontier_coverage_0/group_std_mean": 0.3211081326007843, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003619540063664317, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003619540063664317, "signal/frontier_coverage_1/centered_abs_mean": 0.2531146973371506, "signal/frontier_coverage_1/group_std_mean": 0.3211081326007843, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003619540063664317, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003619540063664317, "signal/frontier_coverage_10/centered_abs_mean": 0.2518859803676605, "signal/frontier_coverage_10/group_std_mean": 0.31956766843795775, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0036019694991409777, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0036019694991409777, "signal/frontier_coverage_15/centered_abs_mean": 0.24905660152435302, "signal/frontier_coverage_15/group_std_mean": 0.3160142481327057, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003561509447172284, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003561509447172284, "signal/frontier_coverage_20/centered_abs_mean": 0.2441376507282257, "signal/frontier_coverage_20/group_std_mean": 0.30990801453590394, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034911684226244686, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034911684226244686, "signal/frontier_coverage_25/centered_abs_mean": 0.19897768795490264, "signal/frontier_coverage_25/group_std_mean": 0.2540053725242615, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028453809674829243, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028453809674829243, "signal/frontier_coverage_5/centered_abs_mean": 0.2531146973371506, "signal/frontier_coverage_5/group_std_mean": 0.3211081326007843, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003619540063664317, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003619540063664317, "step": 90 }, { "calibration/aurc": 0.3024819927679216, "calibration/batch_distribution_entropy": 0.9705055827465557, "calibration/buffer_distribution_entropy": 0.9820260193634669, "calibration/confidence_entropy": 0.43656159370883146, "calibration/coverage@0%": 0.001171875, "calibration/coverage@1%": 0.001171875, "calibration/coverage@10%": 0.070703125, "calibration/coverage@15%": 0.12421875, "calibration/coverage@20%": 0.28102067025440314, "calibration/coverage@25%": 0.39635824363992167, "calibration/coverage@30%": 0.5680765349804305, "calibration/coverage@5%": 0.001171875, "calibration/ece": 0.14709306829226074, "calibration/mean_confidence": 0.4818064076076837, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 758.6, "completions/max_terminated_length": 758.6, "completions/mean_length": 186.2091796875, "completions/mean_terminated_length": 186.3367950439453, "completions/min_length": 0.0, "completions/min_terminated_length": 83.0, "epoch": 0.304, "grad_norm": 0.0009753642370924354, "learning_rate": 1e-06, "loss": 0.0, "num_tokens": 317104968.0, "reward": 0.9321272730827331, "reward_std": 0.09373017102479934, "rewards/accuracy_reward": 0.505859375, "rewards/brier_reward": 0.7468447089195251, "rewards/confidence_uniqueness_reward": 0.9352753400802613, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.11779828369617462, "rewards/frontier_coverage_1": 0.11779828369617462, "rewards/frontier_coverage_10": 0.11779828369617462, "rewards/frontier_coverage_15": 0.11733001321554185, "rewards/frontier_coverage_20": 0.11368912011384964, "rewards/frontier_coverage_25": 0.10015616714954376, "rewards/frontier_coverage_5": 0.11779828369617462, "signal/accuracy_reward/centered_abs_mean": 0.128564453125, "signal/accuracy_reward/group_std_mean": 0.16599983870983123, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0642822265625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0642822265625, "signal/advantage_abs_mean": 0.07163036465644837, "signal/advantage_pre_scale_abs_mean": 0.07163036465644837, "signal/advantage_pre_scale_std": 0.11185246258974076, "signal/advantage_std": 0.11185246258974076, "signal/brier_reward/centered_abs_mean": 0.1896709680557251, "signal/brier_reward/group_std_mean": 0.2372230350971222, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.018967097997665404, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018967097997665404, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03978384882211685, "signal/confidence_uniqueness_reward/group_std_mean": 0.05163332596421242, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003978384891524911, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003978384891524911, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271633476019, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2604574590921402, "signal/frontier_coverage_0/group_std_mean": 0.32943272590637207, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_1/centered_abs_mean": 0.2604574590921402, "signal/frontier_coverage_1/group_std_mean": 0.32943272590637207, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_10/centered_abs_mean": 0.2604574590921402, "signal/frontier_coverage_10/group_std_mean": 0.32943272590637207, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_15/centered_abs_mean": 0.2594649285078049, "signal/frontier_coverage_15/group_std_mean": 0.32818403244018557, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0037103486247360706, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0037103486247360706, "signal/frontier_coverage_20/centered_abs_mean": 0.24546484351158143, "signal/frontier_coverage_20/group_std_mean": 0.31088122725486755, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00351014737971127, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00351014737971127, "signal/frontier_coverage_25/centered_abs_mean": 0.20276572704315185, "signal/frontier_coverage_25/group_std_mean": 0.2578410357236862, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0028995498549193146, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028995498549193146, "signal/frontier_coverage_5/centered_abs_mean": 0.2604574590921402, "signal/frontier_coverage_5/group_std_mean": 0.32943272590637207, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0037245417013764383, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037245417013764383, "step": 95 }, { "calibration/aurc": 0.2584915549059722, "calibration/batch_distribution_entropy": 0.9643361149508778, "calibration/buffer_distribution_entropy": 0.9796284846798822, "calibration/confidence_entropy": 0.4309896924466544, "calibration/coverage@0%": 0.01484375, "calibration/coverage@1%": 0.01484375, "calibration/coverage@10%": 0.14946997549019608, "calibration/coverage@15%": 0.3005147058823529, "calibration/coverage@20%": 0.44199754901960786, "calibration/coverage@25%": 0.5163174019607844, "calibration/coverage@30%": 0.653125, "calibration/coverage@5%": 0.046875, "calibration/ece": 0.16068788980220403, "calibration/mean_confidence": 0.5118728329765949, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 537.4, "completions/max_terminated_length": 537.4, "completions/mean_length": 185.09150390625, "completions/mean_terminated_length": 185.21853942871093, "completions/min_length": 32.8, "completions/min_terminated_length": 85.2, "epoch": 0.32, "grad_norm": 0.000842822715640068, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 334089009.0, "reward": 0.9449261665344239, "reward_std": 0.08346841335296631, "rewards/accuracy_reward": 0.5263671875, "rewards/brier_reward": 0.7642805576324463, "rewards/confidence_uniqueness_reward": 0.9409352779388428, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.11808980870991945, "rewards/frontier_coverage_1": 0.11808980870991945, "rewards/frontier_coverage_10": 0.11808980870991945, "rewards/frontier_coverage_15": 0.11808980870991945, "rewards/frontier_coverage_20": 0.11667817845009268, "rewards/frontier_coverage_25": 0.1014600930036977, "rewards/frontier_coverage_5": 0.11808980870991945, "signal/accuracy_reward/centered_abs_mean": 0.09598388671875, "signal/accuracy_reward/group_std_mean": 0.13364278078079223, "signal/accuracy_reward/group_zero_std_frac": 0.590625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047991943359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047991943359375, "signal/advantage_abs_mean": 0.06248387470841408, "signal/advantage_pre_scale_abs_mean": 0.06248387470841408, "signal/advantage_pre_scale_std": 0.10356017798185349, "signal/advantage_std": 0.10356017798185349, "signal/brier_reward/centered_abs_mean": 0.1814287006855011, "signal/brier_reward/group_std_mean": 0.22994103133678437, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0181428711861372, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0181428711861372, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036495928466320035, "signal/confidence_uniqueness_reward/group_std_mean": 0.046190590411424634, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0036495930049568414, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036495930049568414, "signal/format_reward/centered_abs_mean": 0.001300048828125, "signal/format_reward/group_std_mean": 0.003194373194128275, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006500244140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006500244140625, "signal/frontier_coverage_0/centered_abs_mean": 0.22864521145820618, "signal/frontier_coverage_0/group_std_mean": 0.29235140681266786, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_1/centered_abs_mean": 0.22864521145820618, "signal/frontier_coverage_1/group_std_mean": 0.29235140681266786, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_10/centered_abs_mean": 0.22864521145820618, "signal/frontier_coverage_10/group_std_mean": 0.29235140681266786, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_15/centered_abs_mean": 0.22864521145820618, "signal/frontier_coverage_15/group_std_mean": 0.29235140681266786, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_20/centered_abs_mean": 0.21406340301036836, "signal/frontier_coverage_20/group_std_mean": 0.2742093026638031, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.003061106661334634, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003061106661334634, "signal/frontier_coverage_25/centered_abs_mean": 0.17238016426563263, "signal/frontier_coverage_25/group_std_mean": 0.22113934755325318, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00246503627859056, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00246503627859056, "signal/frontier_coverage_5/centered_abs_mean": 0.22864521145820618, "signal/frontier_coverage_5/group_std_mean": 0.29235140681266786, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0032696264795958998, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0032696264795958998, "step": 100 }, { "epoch": 0.32, "eval_calibration/aurc": 0.4978971802283725, "eval_calibration/batch_distribution_entropy": 0.8601270992336276, "eval_calibration/buffer_distribution_entropy": 0.9743978817320665, "eval_calibration/confidence_entropy": 0.383007351327093, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.046875, "eval_calibration/coverage@20%": 0.0546875, "eval_calibration/coverage@25%": 0.1640625, "eval_calibration/coverage@30%": 0.171875, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.2747890625, "eval_calibration/mean_confidence": 0.4455234375, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 316.25, "eval_completions/max_terminated_length": 316.25, "eval_completions/mean_length": 186.5537452697754, "eval_completions/mean_terminated_length": 186.5537452697754, "eval_completions/min_length": 94.75, "eval_completions/min_terminated_length": 94.75, "eval_loss": 0.0, "eval_num_tokens": 334089009.0, "eval_reward": 0.8891656398773193, "eval_reward_std": 0.21917415410280228, "eval_rewards/accuracy_reward": 0.41015625, "eval_rewards/brier_reward": 0.7693117260932922, "eval_rewards/confidence_uniqueness_reward": 0.882080078125, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.20451999828219414, "eval_rewards/frontier_coverage_1": 0.20451999828219414, "eval_rewards/frontier_coverage_10": 0.20451999828219414, "eval_rewards/frontier_coverage_15": 0.20451999828219414, "eval_rewards/frontier_coverage_20": 0.17699695751070976, "eval_rewards/frontier_coverage_25": 0.1254621297121048, "eval_rewards/frontier_coverage_5": 0.20451999828219414, "eval_runtime": 17.6698, "eval_samples_per_second": 28.297, "eval_signal/accuracy_reward/centered_abs_mean": 0.46484375, "eval_signal/accuracy_reward/group_std_mean": 0.4892386645078659, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.232421875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.232421875, "eval_signal/advantage_abs_mean": 0.2005120925605297, "eval_signal/advantage_pre_scale_abs_mean": 0.2005120925605297, "eval_signal/advantage_pre_scale_std": 0.2169787734746933, "eval_signal/advantage_std": 0.2169787734746933, "eval_signal/brier_reward/centered_abs_mean": 0.23788422346115112, "eval_signal/brier_reward/group_std_mean": 0.2953747808933258, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.023788423743098974, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.023788423743098974, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0579986572265625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06986325047910213, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0057998658157885075, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0057998658157885075, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.4016883596777916, "eval_signal/frontier_coverage_0/group_std_mean": 0.5005353167653084, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.4016883596777916, "eval_signal/frontier_coverage_1/group_std_mean": 0.5005353167653084, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.4016883596777916, "eval_signal/frontier_coverage_10/group_std_mean": 0.5005353167653084, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.4016883596777916, "eval_signal/frontier_coverage_15/group_std_mean": 0.5005353167653084, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.3575369715690613, "eval_signal/frontier_coverage_20/group_std_mean": 0.44863685965538025, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005112778628244996, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005112778628244996, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2680119276046753, "eval_signal/frontier_coverage_25/group_std_mean": 0.33913008868694305, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0038325704517774284, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038325704517774284, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.4016883596777916, "eval_signal/frontier_coverage_5/group_std_mean": 0.5005353167653084, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005744143738411367, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005744143738411367, "eval_steps_per_second": 0.226, "step": 100 }, { "calibration/aurc": 0.3131342794731158, "calibration/batch_distribution_entropy": 0.9569328620971553, "calibration/buffer_distribution_entropy": 0.9708789338570618, "calibration/confidence_entropy": 0.43368021237880383, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.05703125, "calibration/coverage@15%": 0.078125, "calibration/coverage@20%": 0.15157396648727986, "calibration/coverage@25%": 0.3711403803816047, "calibration/coverage@30%": 0.5071214530332682, "calibration/coverage@5%": 0.01640625, "calibration/ece": 0.15830718956356565, "calibration/mean_confidence": 0.46587580069748835, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 623.4, "completions/max_terminated_length": 623.4, "completions/mean_length": 188.03359375, "completions/mean_terminated_length": 188.19609375, "completions/min_length": 16.6, "completions/min_terminated_length": 84.6, "epoch": 0.336, "grad_norm": 0.0009528554510325193, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 350736905.0, "reward": 0.9469254612922668, "reward_std": 0.09120655208826065, "rewards/accuracy_reward": 0.53369140625, "rewards/brier_reward": 0.7606370687484741, "rewards/confidence_uniqueness_reward": 0.9452253580093384, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.10427642688155174, "rewards/frontier_coverage_1": 0.10427642688155174, "rewards/frontier_coverage_10": 0.10427642688155174, "rewards/frontier_coverage_15": 0.1033320739865303, "rewards/frontier_coverage_20": 0.09279508143663406, "rewards/frontier_coverage_25": 0.08138205781579018, "rewards/frontier_coverage_5": 0.10427642688155174, "signal/accuracy_reward/centered_abs_mean": 0.112249755859375, "signal/accuracy_reward/group_std_mean": 0.1554201140999794, "signal/accuracy_reward/group_zero_std_frac": 0.53125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0561248779296875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0561248779296875, "signal/advantage_abs_mean": 0.06810541898012161, "signal/advantage_pre_scale_abs_mean": 0.06810541898012161, "signal/advantage_pre_scale_std": 0.11220057159662247, "signal/advantage_std": 0.11220057159662247, "signal/brier_reward/centered_abs_mean": 0.1775924503803253, "signal/brier_reward/group_std_mean": 0.224722757935524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01775924488902092, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01775924488902092, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03135449551045895, "signal/confidence_uniqueness_reward/group_std_mean": 0.04070526883006096, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0031354496255517005, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031354496255517005, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.00463553611189127, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_coverage_0/centered_abs_mean": 0.22398524582386017, "signal/frontier_coverage_0/group_std_mean": 0.28823475241661073, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_1/centered_abs_mean": 0.22398524582386017, "signal/frontier_coverage_1/group_std_mean": 0.28823475241661073, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_10/centered_abs_mean": 0.22398524582386017, "signal/frontier_coverage_10/group_std_mean": 0.28823475241661073, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_15/centered_abs_mean": 0.21530932188034058, "signal/frontier_coverage_15/group_std_mean": 0.27726990580558775, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0030789232812821867, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030789232812821867, "signal/frontier_coverage_20/centered_abs_mean": 0.18621681928634642, "signal/frontier_coverage_20/group_std_mean": 0.24037640988826753, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0026629004627466203, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026629004627466203, "signal/frontier_coverage_25/centered_abs_mean": 0.14478266537189483, "signal/frontier_coverage_25/group_std_mean": 0.18697050213813782, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0020703921094536782, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020703921094536782, "signal/frontier_coverage_5/centered_abs_mean": 0.22398524582386017, "signal/frontier_coverage_5/group_std_mean": 0.28823475241661073, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003202988859266043, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003202988859266043, "step": 105 }, { "calibration/aurc": 0.33687739417794316, "calibration/batch_distribution_entropy": 0.9215418352994096, "calibration/buffer_distribution_entropy": 0.9673654878997088, "calibration/confidence_entropy": 0.3971975919538019, "calibration/coverage@0%": 0.019930283757338553, "calibration/coverage@1%": 0.019930283757338553, "calibration/coverage@10%": 0.08017062133072407, "calibration/coverage@15%": 0.2220523483365949, "calibration/coverage@20%": 0.33498425269080234, "calibration/coverage@25%": 0.39986240215264185, "calibration/coverage@30%": 0.45926109955968686, "calibration/coverage@5%": 0.043002385029354204, "calibration/ece": 0.1328964998265568, "calibration/mean_confidence": 0.4276185512309817, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 622.0, "completions/max_terminated_length": 622.0, "completions/mean_length": 192.4009765625, "completions/mean_terminated_length": 192.57146911621095, "completions/min_length": 0.0, "completions/min_terminated_length": 86.8, "epoch": 0.352, "grad_norm": 0.0010305993491783738, "learning_rate": 1e-06, "loss": -0.0, "num_tokens": 367967507.0, "reward": 0.9195515155792237, "reward_std": 0.09088908433914185, "rewards/accuracy_reward": 0.46865234375, "rewards/brier_reward": 0.7680434465408326, "rewards/confidence_uniqueness_reward": 0.9434323787689209, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.1578772708773613, "rewards/frontier_coverage_1": 0.1578772708773613, "rewards/frontier_coverage_10": 0.15736701637506484, "rewards/frontier_coverage_15": 0.1525440275669098, "rewards/frontier_coverage_20": 0.133266481757164, "rewards/frontier_coverage_25": 0.10179407596588134, "rewards/frontier_coverage_5": 0.1578772708773613, "signal/accuracy_reward/centered_abs_mean": 0.118914794921875, "signal/accuracy_reward/group_std_mean": 0.15495410561561584, "signal/accuracy_reward/group_zero_std_frac": 0.565625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0594573974609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0594573974609375, "signal/advantage_abs_mean": 0.0692843958735466, "signal/advantage_pre_scale_abs_mean": 0.0692843958735466, "signal/advantage_pre_scale_std": 0.11363352984189987, "signal/advantage_std": 0.11363352984189987, "signal/brier_reward/centered_abs_mean": 0.17312630116939545, "signal/brier_reward/group_std_mean": 0.21913830041885377, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017312630265951156, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017312630265951156, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03259267956018448, "signal/confidence_uniqueness_reward/group_std_mean": 0.04234115481376648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0032592680305242538, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032592680305242538, "signal/format_reward/centered_abs_mean": 0.00186767578125, "signal/format_reward/group_std_mean": 0.00485165468417108, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000933837890625, "signal/frontier_coverage_0/centered_abs_mean": 0.22767443656921388, "signal/frontier_coverage_0/group_std_mean": 0.29212393164634703, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003255744371563196, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003255744371563196, "signal/frontier_coverage_1/centered_abs_mean": 0.22767443656921388, "signal/frontier_coverage_1/group_std_mean": 0.29212393164634703, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003255744371563196, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003255744371563196, "signal/frontier_coverage_10/centered_abs_mean": 0.22719871401786804, "signal/frontier_coverage_10/group_std_mean": 0.2915296196937561, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0032489415258169173, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0032489415258169173, "signal/frontier_coverage_15/centered_abs_mean": 0.2192291349172592, "signal/frontier_coverage_15/group_std_mean": 0.2817227363586426, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003134976560249925, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003134976560249925, "signal/frontier_coverage_20/centered_abs_mean": 0.1940439224243164, "signal/frontier_coverage_20/group_std_mean": 0.24981018900871277, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002774828253313899, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002774828253313899, "signal/frontier_coverage_25/centered_abs_mean": 0.14172750413417817, "signal/frontier_coverage_25/group_std_mean": 0.1828421801328659, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002026703301817179, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002026703301817179, "signal/frontier_coverage_5/centered_abs_mean": 0.22767443656921388, "signal/frontier_coverage_5/group_std_mean": 0.29212393164634703, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003255744371563196, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003255744371563196, "step": 110 }, { "calibration/aurc": 0.38035904158057876, "calibration/batch_distribution_entropy": 0.9363451585140791, "calibration/buffer_distribution_entropy": 0.9557963506573927, "calibration/confidence_entropy": 0.41220067846801395, "calibration/coverage@0%": 0.002734375, "calibration/coverage@1%": 0.002734375, "calibration/coverage@10%": 0.01015625, "calibration/coverage@15%": 0.115625, "calibration/coverage@20%": 0.233203125, "calibration/coverage@25%": 0.303125, "calibration/coverage@30%": 0.346875, "calibration/coverage@5%": 0.002734375, "calibration/ece": 0.14852497762388386, "calibration/mean_confidence": 0.5069563311666597, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 635.8, "completions/max_terminated_length": 635.8, "completions/mean_length": 195.91884765625, "completions/mean_terminated_length": 196.0728973388672, "completions/min_length": 0.0, "completions/min_terminated_length": 85.0, "epoch": 0.368, "grad_norm": 0.001282665878534317, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 385039188.0, "reward": 0.9295849800109863, "reward_std": 0.08723250329494477, "rewards/accuracy_reward": 0.4876953125, "rewards/brier_reward": 0.7701516270637512, "rewards/confidence_uniqueness_reward": 0.9463400363922119, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.15154111981391907, "rewards/frontier_coverage_1": 0.15154111981391907, "rewards/frontier_coverage_10": 0.15106455981731415, "rewards/frontier_coverage_15": 0.14973250329494475, "rewards/frontier_coverage_20": 0.14436569213867187, "rewards/frontier_coverage_25": 0.11632102131843566, "rewards/frontier_coverage_5": 0.15135019719600679, "signal/accuracy_reward/centered_abs_mean": 0.10582275390625, "signal/accuracy_reward/group_std_mean": 0.14048746675252916, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.052911376953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.052911376953125, "signal/advantage_abs_mean": 0.06566806137561798, "signal/advantage_pre_scale_abs_mean": 0.06566806137561798, "signal/advantage_pre_scale_std": 0.11026655286550521, "signal/advantage_std": 0.11026655286550521, "signal/brier_reward/centered_abs_mean": 0.1696721464395523, "signal/brier_reward/group_std_mean": 0.21770406663417816, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01696721464395523, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01696721464395523, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03083474263548851, "signal/confidence_uniqueness_reward/group_std_mean": 0.04060439914464951, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0030834743287414313, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030834743287414313, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844423562288, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.20986934006214142, "signal/frontier_coverage_0/group_std_mean": 0.2717552125453949, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00300113158300519, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00300113158300519, "signal/frontier_coverage_1/centered_abs_mean": 0.20986934006214142, "signal/frontier_coverage_1/group_std_mean": 0.2717552125453949, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00300113158300519, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00300113158300519, "signal/frontier_coverage_10/centered_abs_mean": 0.20876844227313995, "signal/frontier_coverage_10/group_std_mean": 0.270385617017746, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002985388785600662, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002985388785600662, "signal/frontier_coverage_15/centered_abs_mean": 0.2069230079650879, "signal/frontier_coverage_15/group_std_mean": 0.26806753873825073, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002958999015390873, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002958999015390873, "signal/frontier_coverage_20/centered_abs_mean": 0.19959359467029572, "signal/frontier_coverage_20/group_std_mean": 0.2588253915309906, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0028541883453726768, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028541883453726768, "signal/frontier_coverage_25/centered_abs_mean": 0.16081069707870482, "signal/frontier_coverage_25/group_std_mean": 0.20880703628063202, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0022995929699391126, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022995929699391126, "signal/frontier_coverage_5/centered_abs_mean": 0.20957548320293426, "signal/frontier_coverage_5/group_std_mean": 0.27138410210609437, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029969294089823963, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029969294089823963, "step": 115 }, { "calibration/aurc": 0.3342946944021966, "calibration/batch_distribution_entropy": 0.9207722984951203, "calibration/buffer_distribution_entropy": 0.9459169470031471, "calibration/confidence_entropy": 0.39206068388821536, "calibration/coverage@0%": 0.010560652842196094, "calibration/coverage@1%": 0.010560652842196094, "calibration/coverage@10%": 0.17484713156039572, "calibration/coverage@15%": 0.2123777088598086, "calibration/coverage@20%": 0.27841626683436826, "calibration/coverage@25%": 0.3401617719713546, "calibration/coverage@30%": 0.3886329070007088, "calibration/coverage@5%": 0.09822347696156987, "calibration/ece": 0.15159175750225923, "calibration/mean_confidence": 0.4581007046100899, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 785.4, "completions/max_terminated_length": 785.4, "completions/mean_length": 192.38779296875, "completions/mean_terminated_length": 192.59400329589843, "completions/min_length": 0.0, "completions/min_terminated_length": 88.4, "epoch": 0.384, "grad_norm": 0.0008075654623098671, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 401865751.0, "reward": 0.948432457447052, "reward_std": 0.08624267876148224, "rewards/accuracy_reward": 0.525, "rewards/brier_reward": 0.7833073854446411, "rewards/confidence_uniqueness_reward": 0.9480945825576782, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.13540833443403244, "rewards/frontier_coverage_1": 0.13540833443403244, "rewards/frontier_coverage_10": 0.13540833443403244, "rewards/frontier_coverage_15": 0.13456740379333496, "rewards/frontier_coverage_20": 0.1337219536304474, "rewards/frontier_coverage_25": 0.12220003008842469, "rewards/frontier_coverage_5": 0.13540833443403244, "signal/accuracy_reward/centered_abs_mean": 0.10770263671875, "signal/accuracy_reward/group_std_mean": 0.14466220736503602, "signal/accuracy_reward/group_zero_std_frac": 0.578125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.053851318359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.053851318359375, "signal/advantage_abs_mean": 0.06431769207119942, "signal/advantage_pre_scale_abs_mean": 0.06431769207119942, "signal/advantage_pre_scale_std": 0.10962674617767335, "signal/advantage_std": 0.10962674617767335, "signal/brier_reward/centered_abs_mean": 0.15990141928195953, "signal/brier_reward/group_std_mean": 0.2052460253238678, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01599014215171337, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01599014215171337, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02844097763299942, "signal/confidence_uniqueness_reward/group_std_mean": 0.03805322200059891, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0028440977446734907, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028440977446734907, "signal/format_reward/centered_abs_mean": 0.002044677734375, "signal/format_reward/group_std_mean": 0.005344869010150433, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010223388671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010223388671875, "signal/frontier_coverage_0/centered_abs_mean": 0.20318333506584169, "signal/frontier_coverage_0/group_std_mean": 0.262479567527771, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_1/centered_abs_mean": 0.20318333506584169, "signal/frontier_coverage_1/group_std_mean": 0.262479567527771, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_10/centered_abs_mean": 0.20318333506584169, "signal/frontier_coverage_10/group_std_mean": 0.262479567527771, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_15/centered_abs_mean": 0.2018148720264435, "signal/frontier_coverage_15/group_std_mean": 0.26077154874801634, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028859527315944432, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028859527315944432, "signal/frontier_coverage_20/centered_abs_mean": 0.19852499961853026, "signal/frontier_coverage_20/group_std_mean": 0.2565582513809204, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002838907530531287, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002838907530531287, "signal/frontier_coverage_25/centered_abs_mean": 0.1744537502527237, "signal/frontier_coverage_25/group_std_mean": 0.22632047533988953, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002494688564911485, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002494688564911485, "signal/frontier_coverage_5/centered_abs_mean": 0.20318333506584169, "signal/frontier_coverage_5/group_std_mean": 0.262479567527771, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029055217746645212, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029055217746645212, "step": 120 }, { "calibration/aurc": 0.44081405388229433, "calibration/batch_distribution_entropy": 0.9532446689776313, "calibration/buffer_distribution_entropy": 0.9478532000242842, "calibration/confidence_entropy": 0.42983333782589755, "calibration/coverage@0%": 0.005085031925343811, "calibration/coverage@1%": 0.005085031925343811, "calibration/coverage@10%": 0.01055378192534381, "calibration/coverage@15%": 0.013288156925343811, "calibration/coverage@20%": 0.033607563850687625, "calibration/coverage@25%": 0.06876381385068761, "calibration/coverage@30%": 0.1543106888506876, "calibration/coverage@5%": 0.005085031925343811, "calibration/ece": 0.20910516439126764, "calibration/mean_confidence": 0.5139065532992861, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 694.2, "completions/max_terminated_length": 694.2, "completions/mean_length": 192.7953125, "completions/mean_terminated_length": 192.92990112304688, "completions/min_length": 37.4, "completions/min_terminated_length": 93.2, "epoch": 0.4, "grad_norm": 0.0009383531287312508, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 418876423.0, "reward": 0.9311830878257752, "reward_std": 0.09381079226732254, "rewards/accuracy_reward": 0.4966796875, "rewards/brier_reward": 0.758125364780426, "rewards/confidence_uniqueness_reward": 0.9506930947303772, "rewards/format_reward": 0.99931640625, "rewards/frontier_coverage_0": 0.1270300518721342, "rewards/frontier_coverage_1": 0.1270300518721342, "rewards/frontier_coverage_10": 0.12690381668508052, "rewards/frontier_coverage_15": 0.12644278071820736, "rewards/frontier_coverage_20": 0.12001086957752705, "rewards/frontier_coverage_25": 0.10591514781117439, "rewards/frontier_coverage_5": 0.1270300518721342, "signal/accuracy_reward/centered_abs_mean": 0.1243408203125, "signal/accuracy_reward/group_std_mean": 0.16100817918777466, "signal/accuracy_reward/group_zero_std_frac": 0.553125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06217041015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06217041015625, "signal/advantage_abs_mean": 0.07223667949438095, "signal/advantage_pre_scale_abs_mean": 0.07223667949438095, "signal/advantage_pre_scale_std": 0.11894866228103637, "signal/advantage_std": 0.11894866228103637, "signal/brier_reward/centered_abs_mean": 0.17008011043071747, "signal/brier_reward/group_std_mean": 0.2154710829257965, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.017008011415600776, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017008011415600776, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02668723650276661, "signal/confidence_uniqueness_reward/group_std_mean": 0.03516421280801296, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002668723603710532, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002668723603710532, "signal/format_reward/centered_abs_mean": 0.001324462890625, "signal/format_reward/group_std_mean": 0.003866990189999342, "signal/format_reward/group_zero_std_frac": 0.978125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0006622314453125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006622314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.20345945060253143, "signal/frontier_coverage_0/group_std_mean": 0.2629883736371994, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002909470163285732, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002909470163285732, "signal/frontier_coverage_1/centered_abs_mean": 0.20345945060253143, "signal/frontier_coverage_1/group_std_mean": 0.2629883736371994, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002909470163285732, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002909470163285732, "signal/frontier_coverage_10/centered_abs_mean": 0.20321880877017975, "signal/frontier_coverage_10/group_std_mean": 0.26268347799777986, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029060290195047855, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029060290195047855, "signal/frontier_coverage_15/centered_abs_mean": 0.2025788426399231, "signal/frontier_coverage_15/group_std_mean": 0.26185826659202577, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028968773782253265, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028968773782253265, "signal/frontier_coverage_20/centered_abs_mean": 0.1929622620344162, "signal/frontier_coverage_20/group_std_mean": 0.24999802708625793, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0027593603823333978, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027593603823333978, "signal/frontier_coverage_25/centered_abs_mean": 0.16784164309501648, "signal/frontier_coverage_25/group_std_mean": 0.2181039869785309, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002400135388597846, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002400135388597846, "signal/frontier_coverage_5/centered_abs_mean": 0.20345945060253143, "signal/frontier_coverage_5/group_std_mean": 0.2629883736371994, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002909470163285732, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002909470163285732, "step": 125 }, { "calibration/aurc": 0.34096016807284873, "calibration/batch_distribution_entropy": 0.9683422027307792, "calibration/buffer_distribution_entropy": 0.9644673694109379, "calibration/confidence_entropy": 0.45812805550221186, "calibration/coverage@0%": 0.0015655577299412914, "calibration/coverage@1%": 0.0015655577299412914, "calibration/coverage@10%": 0.0015655577299412914, "calibration/coverage@15%": 0.010958904109589041, "calibration/coverage@20%": 0.06301369863013698, "calibration/coverage@25%": 0.15577299412915852, "calibration/coverage@30%": 0.3874610139432485, "calibration/coverage@5%": 0.0015655577299412914, "calibration/ece": 0.12162173186513561, "calibration/mean_confidence": 0.5221142927131631, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 706.6, "completions/max_terminated_length": 706.6, "completions/mean_length": 193.1140625, "completions/mean_terminated_length": 193.30492248535157, "completions/min_length": 0.0, "completions/min_terminated_length": 89.2, "epoch": 0.416, "grad_norm": 0.000847649818751961, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 435735095.0, "reward": 0.9387712836265564, "reward_std": 0.09340896159410476, "rewards/accuracy_reward": 0.5076171875, "rewards/brier_reward": 0.7740776777267456, "rewards/confidence_uniqueness_reward": 0.9541897296905517, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.13038937300443648, "rewards/frontier_coverage_1": 0.13038937300443648, "rewards/frontier_coverage_10": 0.13038937300443648, "rewards/frontier_coverage_15": 0.12946571856737138, "rewards/frontier_coverage_20": 0.12540235221385956, "rewards/frontier_coverage_25": 0.10638794153928757, "rewards/frontier_coverage_5": 0.13038937300443648, "signal/accuracy_reward/centered_abs_mean": 0.1222900390625, "signal/accuracy_reward/group_std_mean": 0.1587248280644417, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.06114501953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06114501953125, "signal/advantage_abs_mean": 0.07159682959318162, "signal/advantage_pre_scale_abs_mean": 0.07159682959318162, "signal/advantage_pre_scale_std": 0.11725033968687057, "signal/advantage_std": 0.11725033968687057, "signal/brier_reward/centered_abs_mean": 0.1667701780796051, "signal/brier_reward/group_std_mean": 0.2120150715112686, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.016677017882466318, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016677017882466318, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02338048294186592, "signal/confidence_uniqueness_reward/group_std_mean": 0.031994204968214035, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023380483500659464, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023380483500659464, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271540343762, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2134171187877655, "signal/frontier_coverage_0/group_std_mean": 0.27234098613262175, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_1/centered_abs_mean": 0.2134171187877655, "signal/frontier_coverage_1/group_std_mean": 0.27234098613262175, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_10/centered_abs_mean": 0.2134171187877655, "signal/frontier_coverage_10/group_std_mean": 0.27234098613262175, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_15/centered_abs_mean": 0.21203236877918244, "signal/frontier_coverage_15/group_std_mean": 0.2705941587686539, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.003032062901183963, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003032062901183963, "signal/frontier_coverage_20/centered_abs_mean": 0.20489224493503572, "signal/frontier_coverage_20/group_std_mean": 0.26140871942043303, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002929958933964372, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002929958933964372, "signal/frontier_coverage_25/centered_abs_mean": 0.17466167509555816, "signal/frontier_coverage_25/group_std_mean": 0.2234892874956131, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.002497661951929331, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002497661951929331, "signal/frontier_coverage_5/centered_abs_mean": 0.2134171187877655, "signal/frontier_coverage_5/group_std_mean": 0.27234098613262175, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.003051864681765437, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003051864681765437, "step": 130 }, { "calibration/aurc": 0.28100625365213594, "calibration/batch_distribution_entropy": 0.9536777814014286, "calibration/buffer_distribution_entropy": 0.9760632185498187, "calibration/confidence_entropy": 0.42176649646530473, "calibration/coverage@0%": 0.003131115459882583, "calibration/coverage@1%": 0.003131115459882583, "calibration/coverage@10%": 0.07123287671232877, "calibration/coverage@15%": 0.18718698438279419, "calibration/coverage@20%": 0.3110179962395917, "calibration/coverage@25%": 0.39481524116495914, "calibration/coverage@30%": 0.586499912464986, "calibration/coverage@5%": 0.017221135029354205, "calibration/ece": 0.11924453470120652, "calibration/mean_confidence": 0.5154573468848538, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 614.0, "completions/max_terminated_length": 614.0, "completions/mean_length": 190.61611328125, "completions/mean_terminated_length": 190.8771759033203, "completions/min_length": 0.0, "completions/min_terminated_length": 91.8, "epoch": 0.432, "grad_norm": 0.0009714950574561954, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 452701340.0, "reward": 0.9574862957000733, "reward_std": 0.08374630659818649, "rewards/accuracy_reward": 0.5421875, "rewards/brier_reward": 0.7939852356910706, "rewards/confidence_uniqueness_reward": 0.9491989016532898, "rewards/format_reward": 0.9986328125, "rewards/frontier_coverage_0": 0.13479797691106796, "rewards/frontier_coverage_1": 0.13479797691106796, "rewards/frontier_coverage_10": 0.13445091098546982, "rewards/frontier_coverage_15": 0.13207549303770066, "rewards/frontier_coverage_20": 0.1242610327899456, "rewards/frontier_coverage_25": 0.09696736782789231, "rewards/frontier_coverage_5": 0.13479797691106796, "signal/accuracy_reward/centered_abs_mean": 0.11279296875, "signal/accuracy_reward/group_std_mean": 0.14243824034929276, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.056396484375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.056396484375, "signal/advantage_abs_mean": 0.06408916339278221, "signal/advantage_pre_scale_abs_mean": 0.06408916339278221, "signal/advantage_pre_scale_std": 0.1099954828619957, "signal/advantage_std": 0.1099954828619957, "signal/brier_reward/centered_abs_mean": 0.14954062998294831, "signal/brier_reward/group_std_mean": 0.1921384632587433, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014954063296318054, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014954063296318054, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02739506885409355, "signal/confidence_uniqueness_reward/group_std_mean": 0.03796095065772533, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00273950700648129, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00273950700648129, "signal/format_reward/centered_abs_mean": 0.00264892578125, "signal/format_reward/group_std_mean": 0.007733980100601912, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001324462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001324462890625, "signal/frontier_coverage_0/centered_abs_mean": 0.20519976019859315, "signal/frontier_coverage_0/group_std_mean": 0.26305282711982725, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029343565460294486, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029343565460294486, "signal/frontier_coverage_1/centered_abs_mean": 0.20519976019859315, "signal/frontier_coverage_1/group_std_mean": 0.26305282711982725, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029343565460294486, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029343565460294486, "signal/frontier_coverage_10/centered_abs_mean": 0.2047807455062866, "signal/frontier_coverage_10/group_std_mean": 0.2625007629394531, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002928364695981145, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002928364695981145, "signal/frontier_coverage_15/centered_abs_mean": 0.19843302965164183, "signal/frontier_coverage_15/group_std_mean": 0.25481436848640443, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002837592316791415, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002837592316791415, "signal/frontier_coverage_20/centered_abs_mean": 0.17869805097579955, "signal/frontier_coverage_20/group_std_mean": 0.2304030865430832, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0025553821586072447, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025553821586072447, "signal/frontier_coverage_25/centered_abs_mean": 0.13113367408514023, "signal/frontier_coverage_25/group_std_mean": 0.16951973736286163, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0018752114614471794, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018752114614471794, "signal/frontier_coverage_5/centered_abs_mean": 0.20519976019859315, "signal/frontier_coverage_5/group_std_mean": 0.26305282711982725, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029343565460294486, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029343565460294486, "step": 135 }, { "calibration/aurc": 0.2959369188581053, "calibration/batch_distribution_entropy": 0.9544817324016235, "calibration/buffer_distribution_entropy": 0.9683931262060484, "calibration/confidence_entropy": 0.4561862154425681, "calibration/coverage@0%": 0.008605216487279844, "calibration/coverage@1%": 0.008605216487279844, "calibration/coverage@10%": 0.09666401663405087, "calibration/coverage@15%": 0.14555864726027395, "calibration/coverage@20%": 0.22690878791585128, "calibration/coverage@25%": 0.35163129892367906, "calibration/coverage@30%": 0.46030837206457925, "calibration/coverage@5%": 0.06418251590019569, "calibration/ece": 0.13582363568984318, "calibration/mean_confidence": 0.561269420046022, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 506.6, "completions/max_terminated_length": 506.6, "completions/mean_length": 195.76884765625, "completions/mean_terminated_length": 195.94118957519532, "completions/min_length": 16.4, "completions/min_terminated_length": 83.2, "epoch": 0.448, "grad_norm": 0.0007859326433390379, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 469658813.0, "reward": 0.9418084502220154, "reward_std": 0.083807834982872, "rewards/accuracy_reward": 0.51064453125, "rewards/brier_reward": 0.7846155047416687, "rewards/confidence_uniqueness_reward": 0.9552451729774475, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.14308023303747178, "rewards/frontier_coverage_1": 0.14308023303747178, "rewards/frontier_coverage_10": 0.14308023303747178, "rewards/frontier_coverage_15": 0.1384931057691574, "rewards/frontier_coverage_20": 0.11155976802110672, "rewards/frontier_coverage_25": 0.0824906125664711, "rewards/frontier_coverage_5": 0.14308023303747178, "signal/accuracy_reward/centered_abs_mean": 0.107501220703125, "signal/accuracy_reward/group_std_mean": 0.14100735783576965, "signal/accuracy_reward/group_zero_std_frac": 0.59375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0537506103515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0537506103515625, "signal/advantage_abs_mean": 0.06401537805795669, "signal/advantage_pre_scale_abs_mean": 0.06401537805795669, "signal/advantage_pre_scale_std": 0.10888355821371079, "signal/advantage_std": 0.10888355821371079, "signal/brier_reward/centered_abs_mean": 0.15687717795372008, "signal/brier_reward/group_std_mean": 0.19920064210891725, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015687718242406844, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015687718242406844, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02206093668937683, "signal/confidence_uniqueness_reward/group_std_mean": 0.02995873913168907, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022060936549678444, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022060936549678444, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.004635536065325141, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_coverage_0/centered_abs_mean": 0.20669485926628112, "signal/frontier_coverage_0/group_std_mean": 0.26254424154758454, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_1/centered_abs_mean": 0.20669485926628112, "signal/frontier_coverage_1/group_std_mean": 0.26254424154758454, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_10/centered_abs_mean": 0.20669485926628112, "signal/frontier_coverage_10/group_std_mean": 0.26254424154758454, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_15/centered_abs_mean": 0.19869714379310607, "signal/frontier_coverage_15/group_std_mean": 0.2524612307548523, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0028413692489266396, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028413692489266396, "signal/frontier_coverage_20/centered_abs_mean": 0.15222130417823793, "signal/frontier_coverage_20/group_std_mean": 0.19457741677761078, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0021767647005617617, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021767647005617617, "signal/frontier_coverage_25/centered_abs_mean": 0.10508765280246735, "signal/frontier_coverage_25/group_std_mean": 0.13414179980754853, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001502753491513431, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001502753491513431, "signal/frontier_coverage_5/centered_abs_mean": 0.20669485926628112, "signal/frontier_coverage_5/group_std_mean": 0.26254424154758454, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002955736452713609, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002955736452713609, "step": 140 }, { "calibration/aurc": 0.41172691860962296, "calibration/batch_distribution_entropy": 0.9676244227784199, "calibration/buffer_distribution_entropy": 0.9721633945621548, "calibration/confidence_entropy": 0.46077859438207697, "calibration/coverage@0%": 0.0023452788649706456, "calibration/coverage@1%": 0.0023452788649706456, "calibration/coverage@10%": 0.0023452788649706456, "calibration/coverage@15%": 0.005085004892367906, "calibration/coverage@20%": 0.11152687744618395, "calibration/coverage@25%": 0.15692652274951074, "calibration/coverage@30%": 0.23550941780821918, "calibration/coverage@5%": 0.0023452788649706456, "calibration/ece": 0.14891257793805987, "calibration/mean_confidence": 0.4762559844498826, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 770.0, "completions/max_terminated_length": 770.0, "completions/mean_length": 199.50302734375, "completions/mean_terminated_length": 199.79557189941406, "completions/min_length": 0.0, "completions/min_terminated_length": 91.0, "epoch": 0.464, "grad_norm": 0.0010671120835468173, "learning_rate": 1e-06, "loss": -0.0014, "num_tokens": 486872540.0, "reward": 0.9163182258605957, "reward_std": 0.08222974836826324, "rewards/accuracy_reward": 0.4634765625, "rewards/brier_reward": 0.7600107669830323, "rewards/confidence_uniqueness_reward": 0.9506341218948364, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.15062947571277618, "rewards/frontier_coverage_1": 0.15062947571277618, "rewards/frontier_coverage_10": 0.15062947571277618, "rewards/frontier_coverage_15": 0.14993023201823236, "rewards/frontier_coverage_20": 0.1425690233707428, "rewards/frontier_coverage_25": 0.1047527477145195, "rewards/frontier_coverage_5": 0.15062947571277618, "signal/accuracy_reward/centered_abs_mean": 0.08927001953125, "signal/accuracy_reward/group_std_mean": 0.12481658458709717, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.044635009765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.044635009765625, "signal/advantage_abs_mean": 0.05983325392007828, "signal/advantage_pre_scale_abs_mean": 0.05983325392007828, "signal/advantage_pre_scale_std": 0.10508770495653152, "signal/advantage_std": 0.10508770495653152, "signal/brier_reward/centered_abs_mean": 0.15510054528713227, "signal/brier_reward/group_std_mean": 0.19951906502246858, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.015510055236518383, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015510055236518383, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025563039630651475, "signal/confidence_uniqueness_reward/group_std_mean": 0.035719023644924165, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025563039351254703, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025563039351254703, "signal/format_reward/centered_abs_mean": 0.002978515625, "signal/format_reward/group_std_mean": 0.007493600901216269, "signal/format_reward/group_zero_std_frac": 0.9625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014892578125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014892578125, "signal/frontier_coverage_0/centered_abs_mean": 0.19262197911739348, "signal/frontier_coverage_0/group_std_mean": 0.24983831644058227, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_1/centered_abs_mean": 0.19262197911739348, "signal/frontier_coverage_1/group_std_mean": 0.24983831644058227, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_10/centered_abs_mean": 0.19262197911739348, "signal/frontier_coverage_10/group_std_mean": 0.24983831644058227, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_15/centered_abs_mean": 0.18970014452934264, "signal/frontier_coverage_15/group_std_mean": 0.24627826511859893, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002712712064385414, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002712712064385414, "signal/frontier_coverage_20/centered_abs_mean": 0.17047151327133178, "signal/frontier_coverage_20/group_std_mean": 0.22192566394805907, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024377426598221064, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024377426598221064, "signal/frontier_coverage_25/centered_abs_mean": 0.11942090839147568, "signal/frontier_coverage_25/group_std_mean": 0.1555239737033844, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0017077189404517412, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017077189404517412, "signal/frontier_coverage_5/centered_abs_mean": 0.19262197911739348, "signal/frontier_coverage_5/group_std_mean": 0.24983831644058227, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002754494268447161, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002754494268447161, "step": 145 }, { "calibration/aurc": 0.2922078350827946, "calibration/batch_distribution_entropy": 0.9414541980973572, "calibration/buffer_distribution_entropy": 0.9761675690904742, "calibration/confidence_entropy": 0.43942815951524417, "calibration/coverage@0%": 0.00664597602739726, "calibration/coverage@1%": 0.00664597602739726, "calibration/coverage@10%": 0.03599483243639921, "calibration/coverage@15%": 0.21574578033268105, "calibration/coverage@20%": 0.29977831457925636, "calibration/coverage@25%": 0.4435520119863014, "calibration/coverage@30%": 0.5006207191780823, "calibration/coverage@5%": 0.01603932240704501, "calibration/ece": 0.16072039812978933, "calibration/mean_confidence": 0.4906872279118442, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 701.2, "completions/max_terminated_length": 701.2, "completions/mean_length": 198.86162109375, "completions/mean_terminated_length": 199.15369873046876, "completions/min_length": 0.0, "completions/min_terminated_length": 93.4, "epoch": 0.48, "grad_norm": 0.0008275453001260757, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 503956915.0, "reward": 0.945784592628479, "reward_std": 0.08588873744010925, "rewards/accuracy_reward": 0.5220703125, "rewards/brier_reward": 0.7770702838897705, "rewards/confidence_uniqueness_reward": 0.9446855783462524, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.1349175065755844, "rewards/frontier_coverage_1": 0.1349175065755844, "rewards/frontier_coverage_10": 0.1349175065755844, "rewards/frontier_coverage_15": 0.1349175065755844, "rewards/frontier_coverage_20": 0.13396640568971635, "rewards/frontier_coverage_25": 0.12195459455251693, "rewards/frontier_coverage_5": 0.1349175065755844, "signal/accuracy_reward/centered_abs_mean": 0.1176513671875, "signal/accuracy_reward/group_std_mean": 0.1545749545097351, "signal/accuracy_reward/group_zero_std_frac": 0.559375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05882568359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05882568359375, "signal/advantage_abs_mean": 0.0640151172876358, "signal/advantage_pre_scale_abs_mean": 0.0640151172876358, "signal/advantage_pre_scale_std": 0.10988998115062713, "signal/advantage_std": 0.10988998115062713, "signal/brier_reward/centered_abs_mean": 0.15043422877788543, "signal/brier_reward/group_std_mean": 0.19341982007026673, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01504342332482338, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01504342332482338, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030228468775749206, "signal/confidence_uniqueness_reward/group_std_mean": 0.040879550576210025, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003022846952080727, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003022846952080727, "signal/format_reward/centered_abs_mean": 0.002825927734375, "signal/format_reward/group_std_mean": 0.007950099045410752, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014129638671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014129638671875, "signal/frontier_coverage_0/centered_abs_mean": 0.20872920453548433, "signal/frontier_coverage_0/group_std_mean": 0.26947686076164246, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_1/centered_abs_mean": 0.20872920453548433, "signal/frontier_coverage_1/group_std_mean": 0.26947686076164246, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_10/centered_abs_mean": 0.20872920453548433, "signal/frontier_coverage_10/group_std_mean": 0.26947686076164246, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_15/centered_abs_mean": 0.20872920453548433, "signal/frontier_coverage_15/group_std_mean": 0.26947686076164246, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_20/centered_abs_mean": 0.20555395781993865, "signal/frontier_coverage_20/group_std_mean": 0.2654797852039337, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0029394214041531088, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029394214041531088, "signal/frontier_coverage_25/centered_abs_mean": 0.18047940731048584, "signal/frontier_coverage_25/group_std_mean": 0.23378402590751649, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0025808554608374836, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0025808554608374836, "signal/frontier_coverage_5/centered_abs_mean": 0.20872920453548433, "signal/frontier_coverage_5/group_std_mean": 0.26947686076164246, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0029848274774849416, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029848274774849416, "step": 150 }, { "epoch": 0.48, "eval_calibration/aurc": 0.5075121591479552, "eval_calibration/batch_distribution_entropy": 0.8902093701786203, "eval_calibration/buffer_distribution_entropy": 0.9694623596330962, "eval_calibration/confidence_entropy": 0.4281672843683656, "eval_calibration/coverage@0%": 0.046875, "eval_calibration/coverage@1%": 0.046875, "eval_calibration/coverage@10%": 0.046875, "eval_calibration/coverage@15%": 0.09375, "eval_calibration/coverage@20%": 0.1171875, "eval_calibration/coverage@25%": 0.140625, "eval_calibration/coverage@30%": 0.1484375, "eval_calibration/coverage@5%": 0.046875, "eval_calibration/ece": 0.2268416569545887, "eval_calibration/mean_confidence": 0.42231040695458877, "eval_completions/clipped_ratio": 0.002155172413793094, "eval_completions/max_length": 458.5, "eval_completions/max_terminated_length": 458.5, "eval_completions/mean_length": 201.06047821044922, "eval_completions/mean_terminated_length": 201.51300811767578, "eval_completions/min_length": 89.25, "eval_completions/min_terminated_length": 120.75, "eval_loss": 0.0, "eval_num_tokens": 503956915.0, "eval_reward": 0.9004999846220016, "eval_reward_std": 0.21980078145861626, "eval_rewards/accuracy_reward": 0.427734375, "eval_rewards/brier_reward": 0.7848155051469803, "eval_rewards/confidence_uniqueness_reward": 0.8925072550773621, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_coverage_0": 0.20046903938055038, "eval_rewards/frontier_coverage_1": 0.20046903938055038, "eval_rewards/frontier_coverage_10": 0.20046903938055038, "eval_rewards/frontier_coverage_15": 0.20046903938055038, "eval_rewards/frontier_coverage_20": 0.19921957328915596, "eval_rewards/frontier_coverage_25": 0.18844087794423103, "eval_rewards/frontier_coverage_5": 0.20046903938055038, "eval_runtime": 28.9081, "eval_samples_per_second": 17.296, "eval_signal/accuracy_reward/centered_abs_mean": 0.4732666015625, "eval_signal/accuracy_reward/group_std_mean": 0.49377230554819107, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23663330078125, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23663330078125, "eval_signal/advantage_abs_mean": 0.20204394310712814, "eval_signal/advantage_pre_scale_abs_mean": 0.20204394310712814, "eval_signal/advantage_pre_scale_std": 0.2179497443139553, "eval_signal/advantage_std": 0.2179497443139553, "eval_signal/brier_reward/centered_abs_mean": 0.20935826003551483, "eval_signal/brier_reward/group_std_mean": 0.2633591406047344, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.020935827400535345, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020935827400535345, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05121587961912155, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0662338575348258, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005121587892062962, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005121587892062962, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.39673639088869095, "eval_signal/frontier_coverage_0/group_std_mean": 0.48953280597925186, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.39673639088869095, "eval_signal/frontier_coverage_1/group_std_mean": 0.48953280597925186, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.39673639088869095, "eval_signal/frontier_coverage_10/group_std_mean": 0.48953280597925186, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.39673639088869095, "eval_signal/frontier_coverage_15/group_std_mean": 0.48953280597925186, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.39225903898477554, "eval_signal/frontier_coverage_20/group_std_mean": 0.4843933880329132, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.005609303945675492, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.005609303945675492, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.36136313527822495, "eval_signal/frontier_coverage_25/group_std_mean": 0.44835418462753296, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.005167493014596403, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.005167493014596403, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.39673639088869095, "eval_signal/frontier_coverage_5/group_std_mean": 0.48953280597925186, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005673330393619835, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005673330393619835, "eval_steps_per_second": 0.138, "step": 150 }, { "calibration/aurc": 0.38781416027199905, "calibration/batch_distribution_entropy": 0.9480112592873683, "calibration/buffer_distribution_entropy": 0.9613049183482241, "calibration/confidence_entropy": 0.43738759589147735, "calibration/coverage@0%": 0.01602709148727984, "calibration/coverage@1%": 0.01602709148727984, "calibration/coverage@10%": 0.11602862035225048, "calibration/coverage@15%": 0.13870015900195692, "calibration/coverage@20%": 0.1789895911275469, "calibration/coverage@25%": 0.22672386720099, "calibration/coverage@30%": 0.2748502971394037, "calibration/coverage@5%": 0.04571459148727984, "calibration/ece": 0.15164128651966327, "calibration/mean_confidence": 0.49700723151505083, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 620.6, "completions/max_terminated_length": 620.6, "completions/mean_length": 203.96142578125, "completions/mean_terminated_length": 204.2204620361328, "completions/min_length": 0.0, "completions/min_terminated_length": 98.8, "epoch": 0.496, "grad_norm": 0.000855308840982616, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 521353320.0, "reward": 0.9558730244636535, "reward_std": 0.08216302543878555, "rewards/accuracy_reward": 0.5431640625, "rewards/brier_reward": 0.7816872239112854, "rewards/confidence_uniqueness_reward": 0.9526494383811951, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.11778900772333145, "rewards/frontier_coverage_1": 0.11778900772333145, "rewards/frontier_coverage_10": 0.11775302439928055, "rewards/frontier_coverage_15": 0.11705803275108337, "rewards/frontier_coverage_20": 0.11374893933534622, "rewards/frontier_coverage_25": 0.10171589106321335, "rewards/frontier_coverage_5": 0.11778900772333145, "signal/accuracy_reward/centered_abs_mean": 0.09918212890625, "signal/accuracy_reward/group_std_mean": 0.13363586366176605, "signal/accuracy_reward/group_zero_std_frac": 0.60625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049591064453125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049591064453125, "signal/advantage_abs_mean": 0.06129216924309731, "signal/advantage_pre_scale_abs_mean": 0.06129216924309731, "signal/advantage_pre_scale_std": 0.1069572851061821, "signal/advantage_std": 0.1069572851061821, "signal/brier_reward/centered_abs_mean": 0.1463674783706665, "signal/brier_reward/group_std_mean": 0.19021452069282532, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014636747911572457, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014636747911572457, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02377520613372326, "signal/confidence_uniqueness_reward/group_std_mean": 0.032842491567134854, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023775207344442608, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023775207344442608, "signal/format_reward/centered_abs_mean": 0.002435302734375, "signal/format_reward/group_std_mean": 0.006508936267346143, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875, "signal/frontier_coverage_0/centered_abs_mean": 0.18798189163208007, "signal/frontier_coverage_0/group_std_mean": 0.24676340222358703, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002688140980899334, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002688140980899334, "signal/frontier_coverage_1/centered_abs_mean": 0.18798189163208007, "signal/frontier_coverage_1/group_std_mean": 0.24676340222358703, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002688140980899334, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002688140980899334, "signal/frontier_coverage_10/centered_abs_mean": 0.18778745234012603, "signal/frontier_coverage_10/group_std_mean": 0.24652613401412965, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026853605639189483, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026853605639189483, "signal/frontier_coverage_15/centered_abs_mean": 0.18584738671779633, "signal/frontier_coverage_15/group_std_mean": 0.2440545976161957, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0026576175820082425, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026576175820082425, "signal/frontier_coverage_20/centered_abs_mean": 0.17214059233665466, "signal/frontier_coverage_20/group_std_mean": 0.22665069103240967, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0024616105481982233, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024616105481982233, "signal/frontier_coverage_25/centered_abs_mean": 0.13599956333637236, "signal/frontier_coverage_25/group_std_mean": 0.17969924956560135, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019447937374934554, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019447937374934554, "signal/frontier_coverage_5/centered_abs_mean": 0.18798189163208007, "signal/frontier_coverage_5/group_std_mean": 0.24676340222358703, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002688140980899334, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002688140980899334, "step": 155 }, { "calibration/aurc": 0.3185750943214118, "calibration/batch_distribution_entropy": 0.9608030156492443, "calibration/buffer_distribution_entropy": 0.9600624228150858, "calibration/confidence_entropy": 0.4684197472341688, "calibration/coverage@0%": 0.026301559694577835, "calibration/coverage@1%": 0.026301559694577835, "calibration/coverage@10%": 0.17225483884261378, "calibration/coverage@15%": 0.26192426369382427, "calibration/coverage@20%": 0.331605793010546, "calibration/coverage@25%": 0.43373122169347056, "calibration/coverage@30%": 0.4939625122309197, "calibration/coverage@5%": 0.04634085242542263, "calibration/ece": 0.14288779158153292, "calibration/mean_confidence": 0.5118868218103911, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 656.0, "completions/max_terminated_length": 656.0, "completions/mean_length": 202.19775390625, "completions/mean_terminated_length": 202.37572631835937, "completions/min_length": 38.6, "completions/min_terminated_length": 94.0, "epoch": 0.512, "grad_norm": 0.0008858853834681213, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 538569489.0, "reward": 0.9573903560638428, "reward_std": 0.08361846506595612, "rewards/accuracy_reward": 0.5423828125, "rewards/brier_reward": 0.7953786492347718, "rewards/confidence_uniqueness_reward": 0.9543616294860839, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.12758512943983077, "rewards/frontier_coverage_1": 0.12758512943983077, "rewards/frontier_coverage_10": 0.12758906930685043, "rewards/frontier_coverage_15": 0.12283221930265427, "rewards/frontier_coverage_20": 0.1063217431306839, "rewards/frontier_coverage_25": 0.07619328275322915, "rewards/frontier_coverage_5": 0.12758512943983077, "signal/accuracy_reward/centered_abs_mean": 0.1026611328125, "signal/accuracy_reward/group_std_mean": 0.14087689369916917, "signal/accuracy_reward/group_zero_std_frac": 0.58125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05133056640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05133056640625, "signal/advantage_abs_mean": 0.061477158963680265, "signal/advantage_pre_scale_abs_mean": 0.061477158963680265, "signal/advantage_pre_scale_std": 0.11054081916809082, "signal/advantage_std": 0.11054081916809082, "signal/brier_reward/centered_abs_mean": 0.13833895027637483, "signal/brier_reward/group_std_mean": 0.18004256784915923, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01383389551192522, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01383389551192522, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021861471980810166, "signal/confidence_uniqueness_reward/group_std_mean": 0.02985510379076004, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002186147286556661, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002186147286556661, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844470128417, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.17110781669616698, "signal/frontier_coverage_0/group_std_mean": 0.2254611998796463, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002446841774508357, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002446841774508357, "signal/frontier_coverage_1/centered_abs_mean": 0.17110781669616698, "signal/frontier_coverage_1/group_std_mean": 0.2254611998796463, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002446841774508357, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002446841774508357, "signal/frontier_coverage_10/centered_abs_mean": 0.17093735337257385, "signal/frontier_coverage_10/group_std_mean": 0.22522563338279725, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024444041773676872, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024444041773676872, "signal/frontier_coverage_15/centered_abs_mean": 0.16177424490451814, "signal/frontier_coverage_15/group_std_mean": 0.21304078996181489, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023133717477321625, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023133717477321625, "signal/frontier_coverage_20/centered_abs_mean": 0.12729571759700775, "signal/frontier_coverage_20/group_std_mean": 0.16839581429958345, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018203288316726684, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018203288316726684, "signal/frontier_coverage_25/centered_abs_mean": 0.0875883400440216, "signal/frontier_coverage_25/group_std_mean": 0.11519992053508758, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012525132391601802, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012525132391601802, "signal/frontier_coverage_5/centered_abs_mean": 0.17110781669616698, "signal/frontier_coverage_5/group_std_mean": 0.2254611998796463, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002446841774508357, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002446841774508357, "step": 160 }, { "calibration/aurc": 0.2115114488085529, "calibration/batch_distribution_entropy": 0.9620413869289213, "calibration/buffer_distribution_entropy": 0.9703211218417002, "calibration/confidence_entropy": 0.441188989082889, "calibration/coverage@0%": 0.03561414505870841, "calibration/coverage@1%": 0.03561414505870841, "calibration/coverage@10%": 0.23783252813111547, "calibration/coverage@15%": 0.46140380381604695, "calibration/coverage@20%": 0.5477915545499021, "calibration/coverage@25%": 0.6400348581213307, "calibration/coverage@30%": 0.7490780944227006, "calibration/coverage@5%": 0.12045162671232876, "calibration/ece": 0.12680648981791925, "calibration/mean_confidence": 0.5183541512637606, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 629.0, "completions/max_terminated_length": 629.0, "completions/mean_length": 202.233984375, "completions/mean_terminated_length": 202.43136291503907, "completions/min_length": 0.0, "completions/min_terminated_length": 95.8, "epoch": 0.528, "grad_norm": 0.0007963773678056896, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 555669901.0, "reward": 0.956901216506958, "reward_std": 0.08233840018510818, "rewards/accuracy_reward": 0.5412109375, "rewards/brier_reward": 0.7963914513587952, "rewards/confidence_uniqueness_reward": 0.9506345868110657, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.13646617978811265, "rewards/frontier_coverage_1": 0.13646617978811265, "rewards/frontier_coverage_10": 0.12993793487548827, "rewards/frontier_coverage_15": 0.11923633962869644, "rewards/frontier_coverage_20": 0.10301651507616043, "rewards/frontier_coverage_25": 0.08392253071069718, "rewards/frontier_coverage_5": 0.13580917268991471, "signal/accuracy_reward/centered_abs_mean": 0.1123046875, "signal/accuracy_reward/group_std_mean": 0.14442603141069413, "signal/accuracy_reward/group_zero_std_frac": 0.6, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.05615234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05615234375, "signal/advantage_abs_mean": 0.06305904388427734, "signal/advantage_pre_scale_abs_mean": 0.06305904388427734, "signal/advantage_pre_scale_std": 0.11096402853727341, "signal/advantage_std": 0.11096402853727341, "signal/brier_reward/centered_abs_mean": 0.14149291813373566, "signal/brier_reward/group_std_mean": 0.1800965338945389, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014149292558431625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014149292558431625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025094441324472427, "signal/confidence_uniqueness_reward/group_std_mean": 0.03405176103115082, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025094441138207913, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025094441138207913, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963275238872, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_coverage_0/centered_abs_mean": 0.1912826269865036, "signal/frontier_coverage_0/group_std_mean": 0.24701529741287231, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002735341480001807, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002735341480001807, "signal/frontier_coverage_1/centered_abs_mean": 0.1912826269865036, "signal/frontier_coverage_1/group_std_mean": 0.24701529741287231, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002735341480001807, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002735341480001807, "signal/frontier_coverage_10/centered_abs_mean": 0.18224962651729584, "signal/frontier_coverage_10/group_std_mean": 0.2354464501142502, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0026061696466058494, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026061696466058494, "signal/frontier_coverage_15/centered_abs_mean": 0.16217685639858245, "signal/frontier_coverage_15/group_std_mean": 0.20974383354187012, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0023191290441900493, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023191290441900493, "signal/frontier_coverage_20/centered_abs_mean": 0.1335637390613556, "signal/frontier_coverage_20/group_std_mean": 0.17252120077610017, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019099614582955837, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019099614582955837, "signal/frontier_coverage_25/centered_abs_mean": 0.0971255749464035, "signal/frontier_coverage_25/group_std_mean": 0.12469170093536378, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001388895697891712, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001388895697891712, "signal/frontier_coverage_5/centered_abs_mean": 0.19096426665782928, "signal/frontier_coverage_5/group_std_mean": 0.24660421013832093, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002730788942426443, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002730788942426443, "step": 165 }, { "calibration/aurc": 0.2552697387528125, "calibration/batch_distribution_entropy": 0.9281181802421189, "calibration/buffer_distribution_entropy": 0.9703870048611121, "calibration/confidence_entropy": 0.42132426701062115, "calibration/coverage@0%": 0.004296875, "calibration/coverage@1%": 0.004296875, "calibration/coverage@10%": 0.0781387597847358, "calibration/coverage@15%": 0.18566918419765166, "calibration/coverage@20%": 0.390991927592955, "calibration/coverage@25%": 0.5626903436888454, "calibration/coverage@30%": 0.6912610078277887, "calibration/coverage@5%": 0.02578125, "calibration/ece": 0.09877190065298394, "calibration/mean_confidence": 0.5423266186064797, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 783.0, "completions/max_terminated_length": 783.0, "completions/mean_length": 205.52802734375, "completions/mean_terminated_length": 205.68857421875, "completions/min_length": 38.2, "completions/min_terminated_length": 92.8, "epoch": 0.544, "grad_norm": 0.0009505171910859644, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 572938092.0, "reward": 0.9629211902618409, "reward_std": 0.08926361948251724, "rewards/accuracy_reward": 0.56416015625, "rewards/brier_reward": 0.7771433115005493, "rewards/confidence_uniqueness_reward": 0.9545076489448547, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.0925481453537941, "rewards/frontier_coverage_1": 0.0925481453537941, "rewards/frontier_coverage_10": 0.08745588660240174, "rewards/frontier_coverage_15": 0.07613595128059387, "rewards/frontier_coverage_20": 0.06427086591720581, "rewards/frontier_coverage_25": 0.06200792193412781, "rewards/frontier_coverage_5": 0.0925481453537941, "signal/accuracy_reward/centered_abs_mean": 0.123321533203125, "signal/accuracy_reward/group_std_mean": 0.16205840706825256, "signal/accuracy_reward/group_zero_std_frac": 0.540625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0616607666015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0616607666015625, "signal/advantage_abs_mean": 0.06674028560519218, "signal/advantage_pre_scale_abs_mean": 0.06674028560519218, "signal/advantage_pre_scale_std": 0.11618766188621521, "signal/advantage_std": 0.11618766188621521, "signal/brier_reward/centered_abs_mean": 0.1495936095714569, "signal/brier_reward/group_std_mean": 0.19124016761779786, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.014959361404180527, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014959361404180527, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022427035868167876, "signal/confidence_uniqueness_reward/group_std_mean": 0.030667876452207567, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022427036659792066, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022427036659792066, "signal/format_reward/centered_abs_mean": 0.001690673828125, "signal/format_reward/group_std_mean": 0.004635535925626755, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008453369140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008453369140625, "signal/frontier_coverage_0/centered_abs_mean": 0.19233616888523103, "signal/frontier_coverage_0/group_std_mean": 0.2488338440656662, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002750407112762332, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002750407112762332, "signal/frontier_coverage_1/centered_abs_mean": 0.19233616888523103, "signal/frontier_coverage_1/group_std_mean": 0.2488338440656662, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002750407112762332, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002750407112762332, "signal/frontier_coverage_10/centered_abs_mean": 0.18275881111621856, "signal/frontier_coverage_10/group_std_mean": 0.23677020370960236, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002613450959324837, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002613450959324837, "signal/frontier_coverage_15/centered_abs_mean": 0.15216105580329894, "signal/frontier_coverage_15/group_std_mean": 0.1980179637670517, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002175903180614114, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002175903180614114, "signal/frontier_coverage_20/centered_abs_mean": 0.12181595414876938, "signal/frontier_coverage_20/group_std_mean": 0.15854274928569795, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0017419681418687106, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017419681418687106, "signal/frontier_coverage_25/centered_abs_mean": 0.09723487794399262, "signal/frontier_coverage_25/group_std_mean": 0.12516086548566818, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001390458783134818, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001390458783134818, "signal/frontier_coverage_5/centered_abs_mean": 0.19233616888523103, "signal/frontier_coverage_5/group_std_mean": 0.2488338440656662, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002750407112762332, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002750407112762332, "step": 170 }, { "calibration/aurc": 0.2696651710957544, "calibration/batch_distribution_entropy": 0.9535177713606812, "calibration/buffer_distribution_entropy": 0.9551885689032709, "calibration/confidence_entropy": 0.44521922125916424, "calibration/coverage@0%": 0.023067514677103718, "calibration/coverage@1%": 0.09767688967710372, "calibration/coverage@10%": 0.21059962084148726, "calibration/coverage@15%": 0.3393285225048924, "calibration/coverage@20%": 0.37964163405088064, "calibration/coverage@25%": 0.4587022994129158, "calibration/coverage@30%": 0.5232815557729942, "calibration/coverage@5%": 0.1785423801369863, "calibration/ece": 0.13447665490400346, "calibration/mean_confidence": 0.5198654114470358, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 656.6, "completions/max_terminated_length": 656.6, "completions/mean_length": 207.4677734375, "completions/mean_terminated_length": 207.65007629394532, "completions/min_length": 0.0, "completions/min_terminated_length": 98.2, "epoch": 0.56, "grad_norm": 0.0007309909560717642, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 589883970.0, "reward": 0.9505073428153992, "reward_std": 0.07694827765226364, "rewards/accuracy_reward": 0.52626953125, "rewards/brier_reward": 0.7975297689437866, "rewards/confidence_uniqueness_reward": 0.9535765409469604, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.142837318778038, "rewards/frontier_coverage_1": 0.142837318778038, "rewards/frontier_coverage_10": 0.14169300347566605, "rewards/frontier_coverage_15": 0.125778466463089, "rewards/frontier_coverage_20": 0.10275547206401825, "rewards/frontier_coverage_25": 0.0894699290394783, "rewards/frontier_coverage_5": 0.142837318778038, "signal/accuracy_reward/centered_abs_mean": 0.089459228515625, "signal/accuracy_reward/group_std_mean": 0.12330863773822784, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0447296142578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0447296142578125, "signal/advantage_abs_mean": 0.05690548121929169, "signal/advantage_pre_scale_abs_mean": 0.05690548121929169, "signal/advantage_pre_scale_std": 0.10379899442195892, "signal/advantage_std": 0.10379899442195892, "signal/brier_reward/centered_abs_mean": 0.13861277103424072, "signal/brier_reward/group_std_mean": 0.1788020610809326, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01386127769947052, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01386127769947052, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02220112681388855, "signal/confidence_uniqueness_reward/group_std_mean": 0.030533115193247796, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002220112690702081, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002220112690702081, "signal/format_reward/centered_abs_mean": 0.001702880859375, "signal/format_reward/group_std_mean": 0.004971844470128417, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0008514404296875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008514404296875, "signal/frontier_coverage_0/centered_abs_mean": 0.1741619348526001, "signal/frontier_coverage_0/group_std_mean": 0.22573706805706023, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024905156344175337, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024905156344175337, "signal/frontier_coverage_1/centered_abs_mean": 0.1741619348526001, "signal/frontier_coverage_1/group_std_mean": 0.22573706805706023, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024905156344175337, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024905156344175337, "signal/frontier_coverage_10/centered_abs_mean": 0.16985383331775666, "signal/frontier_coverage_10/group_std_mean": 0.22016933262348176, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002428909717127681, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002428909717127681, "signal/frontier_coverage_15/centered_abs_mean": 0.14225522726774215, "signal/frontier_coverage_15/group_std_mean": 0.18450681865215302, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002034249692223966, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002034249692223966, "signal/frontier_coverage_20/centered_abs_mean": 0.10933885574340821, "signal/frontier_coverage_20/group_std_mean": 0.14097019135951996, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015635456424206496, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015635456424206496, "signal/frontier_coverage_25/centered_abs_mean": 0.0914567232131958, "signal/frontier_coverage_25/group_std_mean": 0.11628818064928055, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013078311458230019, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013078311458230019, "signal/frontier_coverage_5/centered_abs_mean": 0.1741619348526001, "signal/frontier_coverage_5/group_std_mean": 0.22573706805706023, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024905156344175337, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024905156344175337, "step": 175 }, { "calibration/aurc": 0.3084092567902788, "calibration/batch_distribution_entropy": 0.9473711564829423, "calibration/buffer_distribution_entropy": 0.9565885810199408, "calibration/confidence_entropy": 0.44316332677270776, "calibration/coverage@0%": 0.007427990459882583, "calibration/coverage@1%": 0.007427990459882583, "calibration/coverage@10%": 0.0762116254892368, "calibration/coverage@15%": 0.21067453522504892, "calibration/coverage@20%": 0.2955357142857143, "calibration/coverage@25%": 0.3858060176125245, "calibration/coverage@30%": 0.4729436766144814, "calibration/coverage@5%": 0.007427990459882583, "calibration/ece": 0.0980685209106232, "calibration/mean_confidence": 0.5117813336144459, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001171875, "completions/max_length": 724.4, "completions/max_terminated_length": 724.4, "completions/mean_length": 212.187890625, "completions/mean_terminated_length": 212.44044494628906, "completions/min_length": 0.0, "completions/min_terminated_length": 104.6, "epoch": 0.576, "grad_norm": 0.000960467616096139, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 607243398.0, "reward": 0.9429786443710327, "reward_std": 0.0745589941740036, "rewards/accuracy_reward": 0.516015625, "rewards/brier_reward": 0.7830522418022156, "rewards/confidence_uniqueness_reward": 0.9498249053955078, "rewards/format_reward": 0.998828125, "rewards/frontier_coverage_0": 0.13849677741527558, "rewards/frontier_coverage_1": 0.13849677741527558, "rewards/frontier_coverage_10": 0.13743431270122528, "rewards/frontier_coverage_15": 0.12267089337110519, "rewards/frontier_coverage_20": 0.10137771666049958, "rewards/frontier_coverage_25": 0.08106651529669762, "rewards/frontier_coverage_5": 0.13843247890472413, "signal/accuracy_reward/centered_abs_mean": 0.087158203125, "signal/accuracy_reward/group_std_mean": 0.12412037551403046, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0435791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0435791015625, "signal/advantage_abs_mean": 0.053151430934667586, "signal/advantage_pre_scale_abs_mean": 0.053151430934667586, "signal/advantage_pre_scale_std": 0.10061174780130386, "signal/advantage_std": 0.10061174780130386, "signal/brier_reward/centered_abs_mean": 0.13370026350021363, "signal/brier_reward/group_std_mean": 0.17423023283481598, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013370026648044587, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013370026648044587, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024790653213858604, "signal/confidence_uniqueness_reward/group_std_mean": 0.03437778577208519, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002479065442457795, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479065442457795, "signal/format_reward/centered_abs_mean": 0.00225830078125, "signal/format_reward/group_std_mean": 0.0062928176019340755, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.001129150390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001129150390625, "signal/frontier_coverage_0/centered_abs_mean": 0.1691173493862152, "signal/frontier_coverage_0/group_std_mean": 0.22055851221084594, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0024183780420571565, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024183780420571565, "signal/frontier_coverage_1/centered_abs_mean": 0.1691173493862152, "signal/frontier_coverage_1/group_std_mean": 0.22055851221084594, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0024183780420571565, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024183780420571565, "signal/frontier_coverage_10/centered_abs_mean": 0.16804122924804688, "signal/frontier_coverage_10/group_std_mean": 0.21918695271015168, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002402989659458399, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002402989659458399, "signal/frontier_coverage_15/centered_abs_mean": 0.1508244901895523, "signal/frontier_coverage_15/group_std_mean": 0.19721821546554566, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002156790136359632, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002156790136359632, "signal/frontier_coverage_20/centered_abs_mean": 0.12613796889781953, "signal/frontier_coverage_20/group_std_mean": 0.165260449051857, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0018037728732451797, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018037728732451797, "signal/frontier_coverage_25/centered_abs_mean": 0.09953366965055466, "signal/frontier_coverage_25/group_std_mean": 0.12996020615100862, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014233314665034412, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014233314665034412, "signal/frontier_coverage_5/centered_abs_mean": 0.16890983879566193, "signal/frontier_coverage_5/group_std_mean": 0.22030186653137207, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002415410662069917, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002415410662069917, "step": 180 }, { "calibration/aurc": 0.2922215928302969, "calibration/batch_distribution_entropy": 0.9410299320000319, "calibration/buffer_distribution_entropy": 0.9611906168793969, "calibration/confidence_entropy": 0.42611274466780535, "calibration/coverage@0%": 0.005496432086614173, "calibration/coverage@1%": 0.005496432086614173, "calibration/coverage@10%": 0.22282136550486334, "calibration/coverage@15%": 0.3220304297900262, "calibration/coverage@20%": 0.4666775385796105, "calibration/coverage@25%": 0.562644392338561, "calibration/coverage@30%": 0.6155552124008647, "calibration/coverage@5%": 0.08693791492975142, "calibration/ece": 0.1429741727432932, "calibration/mean_confidence": 0.4983201035996781, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 702.0, "completions/max_terminated_length": 702.0, "completions/mean_length": 211.6880859375, "completions/mean_terminated_length": 211.91741027832032, "completions/min_length": 19.4, "completions/min_terminated_length": 95.0, "epoch": 0.592, "grad_norm": 0.0008625586051493883, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 624578796.0, "reward": 0.9486447334289551, "reward_std": 0.07493900805711746, "rewards/accuracy_reward": 0.52861328125, "rewards/brier_reward": 0.7828563928604126, "rewards/confidence_uniqueness_reward": 0.9443202376365661, "rewards/format_reward": 0.998828125, "rewards/frontier_coverage_0": 0.13428538143634797, "rewards/frontier_coverage_1": 0.13428538143634797, "rewards/frontier_coverage_10": 0.13371351063251496, "rewards/frontier_coverage_15": 0.1263003244996071, "rewards/frontier_coverage_20": 0.10833458304405212, "rewards/frontier_coverage_25": 0.08254657685756683, "rewards/frontier_coverage_5": 0.13412626087665558, "signal/accuracy_reward/centered_abs_mean": 0.094085693359375, "signal/accuracy_reward/group_std_mean": 0.1289389744400978, "signal/accuracy_reward/group_zero_std_frac": 0.609375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0470428466796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0470428466796875, "signal/advantage_abs_mean": 0.05466923713684082, "signal/advantage_pre_scale_abs_mean": 0.05466923713684082, "signal/advantage_pre_scale_std": 0.10098292231559754, "signal/advantage_std": 0.10098292231559754, "signal/brier_reward/centered_abs_mean": 0.13195911645889283, "signal/brier_reward/group_std_mean": 0.17153678834438324, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013195911981165409, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013195911981165409, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029695136472582817, "signal/confidence_uniqueness_reward/group_std_mean": 0.03927401304244995, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029695137403905393, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029695137403905393, "signal/format_reward/centered_abs_mean": 0.0022216796875, "signal/format_reward/group_std_mean": 0.005560987815260887, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00111083984375, "signal/frontier_coverage_0/centered_abs_mean": 0.17768071293830873, "signal/frontier_coverage_0/group_std_mean": 0.23065827786922455, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00254083420149982, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00254083420149982, "signal/frontier_coverage_1/centered_abs_mean": 0.17768071293830873, "signal/frontier_coverage_1/group_std_mean": 0.23065827786922455, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00254083420149982, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00254083420149982, "signal/frontier_coverage_10/centered_abs_mean": 0.1767966330051422, "signal/frontier_coverage_10/group_std_mean": 0.22955543994903566, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0025281918235123156, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025281918235123156, "signal/frontier_coverage_15/centered_abs_mean": 0.1646002173423767, "signal/frontier_coverage_15/group_std_mean": 0.2140391707420349, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002353783091530204, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002353783091530204, "signal/frontier_coverage_20/centered_abs_mean": 0.143599534034729, "signal/frontier_coverage_20/group_std_mean": 0.18697430789470673, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0020534733310341837, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020534733310341837, "signal/frontier_coverage_25/centered_abs_mean": 0.10858550369739532, "signal/frontier_coverage_25/group_std_mean": 0.1407358765602112, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015527727315202355, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0015527727315202355, "signal/frontier_coverage_5/centered_abs_mean": 0.17743545770645142, "signal/frontier_coverage_5/group_std_mean": 0.23035745918750763, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025373270735144617, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025373270735144617, "step": 185 }, { "calibration/aurc": 0.2221950929064717, "calibration/batch_distribution_entropy": 0.8981914098495842, "calibration/buffer_distribution_entropy": 0.9540588288714265, "calibration/confidence_entropy": 0.3871919967367749, "calibration/coverage@0%": 0.03520986519607843, "calibration/coverage@1%": 0.03520986519607843, "calibration/coverage@10%": 0.26088082107843136, "calibration/coverage@15%": 0.42122549019607847, "calibration/coverage@20%": 0.5440119485294117, "calibration/coverage@25%": 0.6319975490196079, "calibration/coverage@30%": 0.7090349264705882, "calibration/coverage@5%": 0.16232536764705885, "calibration/ece": 0.10212346384803919, "calibration/mean_confidence": 0.4796287089460784, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00205078125, "completions/max_length": 616.0, "completions/max_terminated_length": 616.0, "completions/mean_length": 212.79658203125, "completions/mean_terminated_length": 213.23521423339844, "completions/min_length": 0.0, "completions/min_terminated_length": 101.2, "epoch": 0.608, "grad_norm": 0.0006865831674076617, "learning_rate": 1e-06, "loss": -0.0015, "num_tokens": 641757321.0, "reward": 0.953406548500061, "reward_std": 0.06920359134674073, "rewards/accuracy_reward": 0.52880859375, "rewards/brier_reward": 0.812217366695404, "rewards/confidence_uniqueness_reward": 0.9366782307624817, "rewards/format_reward": 0.9978515625, "rewards/frontier_coverage_0": 0.17517081648111343, "rewards/frontier_coverage_1": 0.17517081648111343, "rewards/frontier_coverage_10": 0.16990652978420256, "rewards/frontier_coverage_15": 0.14712055921554565, "rewards/frontier_coverage_20": 0.12427032291889191, "rewards/frontier_coverage_25": 0.09521407037973403, "rewards/frontier_coverage_5": 0.17517081648111343, "signal/accuracy_reward/centered_abs_mean": 0.090557861328125, "signal/accuracy_reward/group_std_mean": 0.11723618507385254, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0452789306640625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0452789306640625, "signal/advantage_abs_mean": 0.05036781206727028, "signal/advantage_pre_scale_abs_mean": 0.05036781206727028, "signal/advantage_pre_scale_std": 0.09739291965961457, "signal/advantage_std": 0.09739291965961457, "signal/brier_reward/centered_abs_mean": 0.12750503718852996, "signal/brier_reward/group_std_mean": 0.16529836356639863, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012750503793358802, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012750503793358802, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03464677035808563, "signal/confidence_uniqueness_reward/group_std_mean": 0.04760062992572785, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.003464677091687918, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003464677091687918, "signal/format_reward/centered_abs_mean": 0.00411376953125, "signal/format_reward/group_std_mean": 0.010808163974434137, "signal/format_reward/group_zero_std_frac": 0.94375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002056884765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002056884765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1837489575147629, "signal/frontier_coverage_0/group_std_mean": 0.23425883054733276, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0026276100426912306, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026276100426912306, "signal/frontier_coverage_1/centered_abs_mean": 0.1837489575147629, "signal/frontier_coverage_1/group_std_mean": 0.23425883054733276, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0026276100426912306, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026276100426912306, "signal/frontier_coverage_10/centered_abs_mean": 0.1784739762544632, "signal/frontier_coverage_10/group_std_mean": 0.22762221992015838, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00255217794328928, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00255217794328928, "signal/frontier_coverage_15/centered_abs_mean": 0.1537548005580902, "signal/frontier_coverage_15/group_std_mean": 0.19631823003292084, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002198693575337529, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002198693575337529, "signal/frontier_coverage_20/centered_abs_mean": 0.1277748465538025, "signal/frontier_coverage_20/group_std_mean": 0.16300177872180938, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001827180222608149, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001827180222608149, "signal/frontier_coverage_25/centered_abs_mean": 0.08930874615907669, "signal/frontier_coverage_25/group_std_mean": 0.11316503584384918, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0012771150562912227, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012771150562912227, "signal/frontier_coverage_5/centered_abs_mean": 0.1837489575147629, "signal/frontier_coverage_5/group_std_mean": 0.23425883054733276, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0026276100426912306, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026276100426912306, "step": 190 }, { "calibration/aurc": 0.25358338367873656, "calibration/batch_distribution_entropy": 0.9409049379893626, "calibration/buffer_distribution_entropy": 0.9444694889337532, "calibration/confidence_entropy": 0.4293404041236972, "calibration/coverage@0%": 0.015234375, "calibration/coverage@1%": 0.015234375, "calibration/coverage@10%": 0.1504701259784736, "calibration/coverage@15%": 0.23410591976516634, "calibration/coverage@20%": 0.42790560787671234, "calibration/coverage@25%": 0.525601608365949, "calibration/coverage@30%": 0.6604444410469668, "calibration/coverage@5%": 0.05234375, "calibration/ece": 0.10427488049231773, "calibration/mean_confidence": 0.5089863970315482, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 652.6, "completions/max_terminated_length": 652.6, "completions/mean_length": 216.096484375, "completions/mean_terminated_length": 216.2876770019531, "completions/min_length": 21.2, "completions/min_terminated_length": 102.6, "epoch": 0.624, "grad_norm": 0.0008978003170341253, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 659314053.0, "reward": 0.9518577218055725, "reward_std": 0.07592637240886688, "rewards/accuracy_reward": 0.52685546875, "rewards/brier_reward": 0.8048586487770081, "rewards/confidence_uniqueness_reward": 0.9485518336296082, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.16178978681564332, "rewards/frontier_coverage_1": 0.16178978681564332, "rewards/frontier_coverage_10": 0.14961466044187546, "rewards/frontier_coverage_15": 0.12837036401033403, "rewards/frontier_coverage_20": 0.10305129885673522, "rewards/frontier_coverage_25": 0.08324643075466157, "rewards/frontier_coverage_5": 0.16159312427043915, "signal/accuracy_reward/centered_abs_mean": 0.095526123046875, "signal/accuracy_reward/group_std_mean": 0.12815573960542678, "signal/accuracy_reward/group_zero_std_frac": 0.625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477630615234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477630615234375, "signal/advantage_abs_mean": 0.05672949850559235, "signal/advantage_pre_scale_abs_mean": 0.05672949850559235, "signal/advantage_pre_scale_std": 0.10506684482097625, "signal/advantage_std": 0.10506684482097625, "signal/brier_reward/centered_abs_mean": 0.13485520780086518, "signal/brier_reward/group_std_mean": 0.17366183698177337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013485521078109741, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013485521078109741, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025889959558844565, "signal/confidence_uniqueness_reward/group_std_mean": 0.035064182430505755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0025889959651976824, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025889959651976824, "signal/format_reward/centered_abs_mean": 0.00186767578125, "signal/format_reward/group_std_mean": 0.004851654777303338, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000933837890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000933837890625, "signal/frontier_coverage_0/centered_abs_mean": 0.17606533467769622, "signal/frontier_coverage_0/group_std_mean": 0.22901685237884523, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025177341885864735, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025177341885864735, "signal/frontier_coverage_1/centered_abs_mean": 0.17606533467769622, "signal/frontier_coverage_1/group_std_mean": 0.22901685237884523, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025177341885864735, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025177341885864735, "signal/frontier_coverage_10/centered_abs_mean": 0.15669833421707152, "signal/frontier_coverage_10/group_std_mean": 0.20450213849544524, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0022407862357795238, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022407862357795238, "signal/frontier_coverage_15/centered_abs_mean": 0.1294904828071594, "signal/frontier_coverage_15/group_std_mean": 0.16927442252635955, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001851713890209794, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001851713890209794, "signal/frontier_coverage_20/centered_abs_mean": 0.09612660855054855, "signal/frontier_coverage_20/group_std_mean": 0.1249430313706398, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013746104203164577, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013746104203164577, "signal/frontier_coverage_25/centered_abs_mean": 0.07619047313928604, "signal/frontier_coverage_25/group_std_mean": 0.09707885384559631, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010895237675867975, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010895237675867975, "signal/frontier_coverage_5/centered_abs_mean": 0.1756005883216858, "signal/frontier_coverage_5/group_std_mean": 0.2284373462200165, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025110884103924035, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025110884103924035, "step": 195 }, { "calibration/aurc": 0.26737008830943737, "calibration/batch_distribution_entropy": 0.9344917692121356, "calibration/buffer_distribution_entropy": 0.9439410149296797, "calibration/confidence_entropy": 0.4337533162224349, "calibration/coverage@0%": 0.00703125, "calibration/coverage@1%": 0.00703125, "calibration/coverage@10%": 0.26171875, "calibration/coverage@15%": 0.3359559184675835, "calibration/coverage@20%": 0.4015809184675835, "calibration/coverage@25%": 0.5195865054027504, "calibration/coverage@30%": 0.6243131446463654, "calibration/coverage@5%": 0.160546875, "calibration/ece": 0.16993083121851912, "calibration/mean_confidence": 0.5635277496385536, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001953125, "completions/max_length": 843.4, "completions/max_terminated_length": 843.4, "completions/mean_length": 220.33671875, "completions/mean_terminated_length": 220.7684814453125, "completions/min_length": 20.4, "completions/min_terminated_length": 100.8, "epoch": 0.64, "grad_norm": 0.0007992589962668717, "learning_rate": 1e-06, "loss": -0.001, "num_tokens": 676912989.0, "reward": 0.9679869651794434, "reward_std": 0.06987917795777321, "rewards/accuracy_reward": 0.56767578125, "rewards/brier_reward": 0.7970318555831909, "rewards/confidence_uniqueness_reward": 0.9502007246017456, "rewards/format_reward": 0.998046875, "rewards/frontier_coverage_0": 0.11542568355798721, "rewards/frontier_coverage_1": 0.11542568355798721, "rewards/frontier_coverage_10": 0.1147423341870308, "rewards/frontier_coverage_15": 0.09766240417957306, "rewards/frontier_coverage_20": 0.08443292677402496, "rewards/frontier_coverage_25": 0.08432328850030898, "rewards/frontier_coverage_5": 0.11542568355798721, "signal/accuracy_reward/centered_abs_mean": 0.082891845703125, "signal/accuracy_reward/group_std_mean": 0.112214395403862, "signal/accuracy_reward/group_zero_std_frac": 0.671875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0414459228515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0414459228515625, "signal/advantage_abs_mean": 0.051616641134023665, "signal/advantage_pre_scale_abs_mean": 0.051616641134023665, "signal/advantage_pre_scale_std": 0.10157921314239501, "signal/advantage_std": 0.10157921314239501, "signal/brier_reward/centered_abs_mean": 0.12237899452447891, "signal/brier_reward/group_std_mean": 0.15896496474742888, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012237900123000144, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012237900123000144, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02465735524892807, "signal/confidence_uniqueness_reward/group_std_mean": 0.03368319347500801, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00246573556214571, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00246573556214571, "signal/format_reward/centered_abs_mean": 0.0033203125, "signal/format_reward/group_std_mean": 0.006831592507660389, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00166015625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00166015625, "signal/frontier_coverage_0/centered_abs_mean": 0.147454434633255, "signal/frontier_coverage_0/group_std_mean": 0.19461724758148194, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021085983607918024, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021085983607918024, "signal/frontier_coverage_1/centered_abs_mean": 0.147454434633255, "signal/frontier_coverage_1/group_std_mean": 0.19461724758148194, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021085983607918024, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021085983607918024, "signal/frontier_coverage_10/centered_abs_mean": 0.13889139294624328, "signal/frontier_coverage_10/group_std_mean": 0.18324436545372008, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019861468579620125, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019861468579620125, "signal/frontier_coverage_15/centered_abs_mean": 0.11265147924423217, "signal/frontier_coverage_15/group_std_mean": 0.1481318399310112, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016109161078929901, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016109161078929901, "signal/frontier_coverage_20/centered_abs_mean": 0.08642503172159195, "signal/frontier_coverage_20/group_std_mean": 0.11224976629018783, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012358779087662697, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012358779087662697, "signal/frontier_coverage_25/centered_abs_mean": 0.07372982874512672, "signal/frontier_coverage_25/group_std_mean": 0.09456790834665299, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001054336572997272, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001054336572997272, "signal/frontier_coverage_5/centered_abs_mean": 0.147454434633255, "signal/frontier_coverage_5/group_std_mean": 0.19461724758148194, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021085983607918024, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021085983607918024, "step": 200 }, { "epoch": 0.64, "eval_calibration/aurc": 0.44617585022319983, "eval_calibration/batch_distribution_entropy": 0.8707856047342964, "eval_calibration/buffer_distribution_entropy": 0.9545939481969506, "eval_calibration/confidence_entropy": 0.42791586438434864, "eval_calibration/coverage@0%": 0.03125, "eval_calibration/coverage@1%": 0.03125, "eval_calibration/coverage@10%": 0.03125, "eval_calibration/coverage@15%": 0.03125, "eval_calibration/coverage@20%": 0.1328125, "eval_calibration/coverage@25%": 0.203125, "eval_calibration/coverage@30%": 0.2109375, "eval_calibration/coverage@5%": 0.03125, "eval_calibration/ece": 0.2234921875, "eval_calibration/mean_confidence": 0.5258359375, "eval_completions/clipped_ratio": 0.001953125, "eval_completions/max_length": 470.0, "eval_completions/max_terminated_length": 470.0, "eval_completions/mean_length": 221.40463256835938, "eval_completions/mean_terminated_length": 221.83445739746094, "eval_completions/min_length": 87.5, "eval_completions/min_terminated_length": 118.0, "eval_loss": 0.0, "eval_num_tokens": 676912989.0, "eval_reward": 0.8984390497207642, "eval_reward_std": 0.23546447232365608, "eval_rewards/accuracy_reward": 0.43359375, "eval_rewards/brier_reward": 0.77432681620121, "eval_rewards/confidence_uniqueness_reward": 0.9010600447654724, "eval_rewards/format_reward": 0.998046875, "eval_rewards/frontier_coverage_0": 0.18082591891288757, "eval_rewards/frontier_coverage_1": 0.18082591891288757, "eval_rewards/frontier_coverage_10": 0.1791050136089325, "eval_rewards/frontier_coverage_15": 0.1590343937277794, "eval_rewards/frontier_coverage_20": 0.1054372489452362, "eval_rewards/frontier_coverage_25": 0.06905412301421165, "eval_rewards/frontier_coverage_5": 0.18026751652359962, "eval_runtime": 31.6018, "eval_samples_per_second": 15.822, "eval_signal/accuracy_reward/centered_abs_mean": 0.47314453125, "eval_signal/accuracy_reward/group_std_mean": 0.4938596710562706, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.236572265625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.236572265625, "eval_signal/advantage_abs_mean": 0.21800414845347404, "eval_signal/advantage_pre_scale_abs_mean": 0.21800414845347404, "eval_signal/advantage_pre_scale_std": 0.2330782301723957, "eval_signal/advantage_std": 0.2330782301723957, "eval_signal/brier_reward/centered_abs_mean": 0.2277192696928978, "eval_signal/brier_reward/group_std_mean": 0.27933692932128906, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.022771927528083324, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.022771927528083324, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.043904950842261314, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05730144586414099, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00439049513079226, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00439049513079226, "eval_signal/format_reward/centered_abs_mean": 0.0037841796875, "eval_signal/format_reward/group_std_mean": 0.011048543266952038, "eval_signal/format_reward/group_zero_std_frac": 0.9375, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.00189208984375, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.00189208984375, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3516712710261345, "eval_signal/frontier_coverage_0/group_std_mean": 0.44670548290014267, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005028899176977575, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005028899176977575, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3516712710261345, "eval_signal/frontier_coverage_1/group_std_mean": 0.44670548290014267, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005028899176977575, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005028899176977575, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3491114154458046, "eval_signal/frontier_coverage_10/group_std_mean": 0.4436797574162483, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004992293077521026, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004992293077521026, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.302977554500103, "eval_signal/frontier_coverage_15/group_std_mean": 0.3866383582353592, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004332578741014004, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004332578741014004, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20721931010484695, "eval_signal/frontier_coverage_20/group_std_mean": 0.2700957730412483, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.002963236125651747, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002963236125651747, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.13527094572782516, "eval_signal/frontier_coverage_25/group_std_mean": 0.17407378554344177, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019343745952937752, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019343745952937752, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.35068874806165695, "eval_signal/frontier_coverage_5/group_std_mean": 0.4455043748021126, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005014849128201604, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005014849128201604, "eval_steps_per_second": 0.127, "step": 200 }, { "calibration/aurc": 0.4264361275920406, "calibration/batch_distribution_entropy": 0.9508852979247158, "calibration/buffer_distribution_entropy": 0.9585796176872498, "calibration/confidence_entropy": 0.46872125411654464, "calibration/coverage@0%": 0.00234375, "calibration/coverage@1%": 0.00234375, "calibration/coverage@10%": 0.0078125, "calibration/coverage@15%": 0.041015625, "calibration/coverage@20%": 0.0910324425146771, "calibration/coverage@25%": 0.13128822162426615, "calibration/coverage@30%": 0.22585004892367905, "calibration/coverage@5%": 0.00234375, "calibration/ece": 0.14194846120468735, "calibration/mean_confidence": 0.5072344795816444, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 965.2, "completions/max_terminated_length": 965.2, "completions/mean_length": 220.93134765625, "completions/mean_terminated_length": 221.25537109375, "completions/min_length": 0.0, "completions/min_terminated_length": 104.8, "epoch": 0.656, "grad_norm": 0.0007650917395949364, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 694031870.0, "reward": 0.9340495944023133, "reward_std": 0.07882022857666016, "rewards/accuracy_reward": 0.49873046875, "rewards/brier_reward": 0.7780602931976318, "rewards/confidence_uniqueness_reward": 0.9526399970054626, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.13952269703149794, "rewards/frontier_coverage_1": 0.13952269703149794, "rewards/frontier_coverage_10": 0.13961339592933655, "rewards/frontier_coverage_15": 0.12747088223695754, "rewards/frontier_coverage_20": 0.10037298947572708, "rewards/frontier_coverage_25": 0.07738360986113549, "rewards/frontier_coverage_5": 0.13952269703149794, "signal/accuracy_reward/centered_abs_mean": 0.096051025390625, "signal/accuracy_reward/group_std_mean": 0.12981612980365753, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0480255126953125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0480255126953125, "signal/advantage_abs_mean": 0.058326976001262666, "signal/advantage_pre_scale_abs_mean": 0.058326976001262666, "signal/advantage_pre_scale_std": 0.10792815685272217, "signal/advantage_std": 0.10792815685272217, "signal/brier_reward/centered_abs_mean": 0.13490013182163238, "signal/brier_reward/group_std_mean": 0.17295385301113128, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.013490013219416142, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013490013219416142, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02310769259929657, "signal/confidence_uniqueness_reward/group_std_mean": 0.03357519060373306, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002310769259929657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002310769259929657, "signal/format_reward/centered_abs_mean": 0.002838134765625, "signal/format_reward/group_std_mean": 0.008286407357081771, "signal/format_reward/group_zero_std_frac": 0.953125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0014190673828125, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0014190673828125, "signal/frontier_coverage_0/centered_abs_mean": 0.16485767662525178, "signal/frontier_coverage_0/group_std_mean": 0.21537945568561553, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002357464749366045, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002357464749366045, "signal/frontier_coverage_1/centered_abs_mean": 0.16485767662525178, "signal/frontier_coverage_1/group_std_mean": 0.21537945568561553, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002357464749366045, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002357464749366045, "signal/frontier_coverage_10/centered_abs_mean": 0.16448655128479003, "signal/frontier_coverage_10/group_std_mean": 0.2148954153060913, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002352157747372985, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002352157747372985, "signal/frontier_coverage_15/centered_abs_mean": 0.14851576387882232, "signal/frontier_coverage_15/group_std_mean": 0.19453147947788238, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0021237753331661223, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021237753331661223, "signal/frontier_coverage_20/centered_abs_mean": 0.11294280290603638, "signal/frontier_coverage_20/group_std_mean": 0.14792169630527496, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016150820534676313, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016150820534676313, "signal/frontier_coverage_25/centered_abs_mean": 0.08520928621292115, "signal/frontier_coverage_25/group_std_mean": 0.11086486130952836, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.001218492747284472, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001218492747284472, "signal/frontier_coverage_5/centered_abs_mean": 0.16485767662525178, "signal/frontier_coverage_5/group_std_mean": 0.21537945568561553, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002357464749366045, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002357464749366045, "step": 205 }, { "calibration/aurc": 0.2837096886454081, "calibration/batch_distribution_entropy": 0.9393985931619993, "calibration/buffer_distribution_entropy": 0.9664153325877605, "calibration/confidence_entropy": 0.4499859017812251, "calibration/coverage@0%": 0.04806522137964775, "calibration/coverage@1%": 0.05744022137964775, "calibration/coverage@10%": 0.17310267857142855, "calibration/coverage@15%": 0.21100629892367903, "calibration/coverage@20%": 0.29230140044031316, "calibration/coverage@25%": 0.34975538160469666, "calibration/coverage@30%": 0.4682347725048923, "calibration/coverage@5%": 0.11333552470645794, "calibration/ece": 0.14122779847886976, "calibration/mean_confidence": 0.5043080658845975, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013671875, "completions/max_length": 940.2, "completions/max_terminated_length": 940.2, "completions/mean_length": 219.98779296875, "completions/mean_terminated_length": 220.29050903320314, "completions/min_length": 20.8, "completions/min_terminated_length": 104.8, "epoch": 0.672, "grad_norm": 0.0007192640914581716, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 711197985.0, "reward": 0.9461759328842163, "reward_std": 0.07189572900533676, "rewards/accuracy_reward": 0.51611328125, "rewards/brier_reward": 0.7978503227233886, "rewards/confidence_uniqueness_reward": 0.9457229971885681, "rewards/format_reward": 0.99853515625, "rewards/frontier_coverage_0": 0.15941035151481628, "rewards/frontier_coverage_1": 0.15941035151481628, "rewards/frontier_coverage_10": 0.15893703401088716, "rewards/frontier_coverage_15": 0.14440302103757857, "rewards/frontier_coverage_20": 0.12741003930568695, "rewards/frontier_coverage_25": 0.10461192578077316, "rewards/frontier_coverage_5": 0.15941035151481628, "signal/accuracy_reward/centered_abs_mean": 0.096649169921875, "signal/accuracy_reward/group_std_mean": 0.13027789890766145, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0483245849609375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0483245849609375, "signal/advantage_abs_mean": 0.05240143835544586, "signal/advantage_pre_scale_abs_mean": 0.05240143835544586, "signal/advantage_pre_scale_std": 0.09864708036184311, "signal/advantage_std": 0.09864708036184311, "signal/brier_reward/centered_abs_mean": 0.1261022225022316, "signal/brier_reward/group_std_mean": 0.1641417384147644, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012610222585499287, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012610222585499287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026672930270433427, "signal/confidence_uniqueness_reward/group_std_mean": 0.03577820919454098, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002667293045669794, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002667293045669794, "signal/format_reward/centered_abs_mean": 0.002764892578125, "signal/format_reward/group_std_mean": 0.006545652449131012, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0013824462890625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0013824462890625, "signal/frontier_coverage_0/centered_abs_mean": 0.17697471082210542, "signal/frontier_coverage_0/group_std_mean": 0.23011261522769927, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025307383853942155, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025307383853942155, "signal/frontier_coverage_1/centered_abs_mean": 0.17697471082210542, "signal/frontier_coverage_1/group_std_mean": 0.23011261522769927, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025307383853942155, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025307383853942155, "signal/frontier_coverage_10/centered_abs_mean": 0.1716696798801422, "signal/frontier_coverage_10/group_std_mean": 0.2236652761697769, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0024548764806240795, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024548764806240795, "signal/frontier_coverage_15/centered_abs_mean": 0.1511073052883148, "signal/frontier_coverage_15/group_std_mean": 0.1979276180267334, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.002160834474489093, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002160834474489093, "signal/frontier_coverage_20/centered_abs_mean": 0.12761921137571336, "signal/frontier_coverage_20/group_std_mean": 0.16750997304916382, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001824954734183848, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001824954734183848, "signal/frontier_coverage_25/centered_abs_mean": 0.09979205876588822, "signal/frontier_coverage_25/group_std_mean": 0.13040834069252014, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0014270264655351639, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014270264655351639, "signal/frontier_coverage_5/centered_abs_mean": 0.17697471082210542, "signal/frontier_coverage_5/group_std_mean": 0.23011261522769927, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025307383853942155, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025307383853942155, "step": 210 }, { "calibration/aurc": 0.32691903665475613, "calibration/batch_distribution_entropy": 0.9420609702487284, "calibration/buffer_distribution_entropy": 0.9593856095088638, "calibration/confidence_entropy": 0.4599136168799573, "calibration/coverage@0%": 0.010546875, "calibration/coverage@1%": 0.010546875, "calibration/coverage@10%": 0.053125, "calibration/coverage@15%": 0.158203125, "calibration/coverage@20%": 0.364453125, "calibration/coverage@25%": 0.51171875, "calibration/coverage@30%": 0.615234375, "calibration/coverage@5%": 0.01796875, "calibration/ece": 0.13961892604670562, "calibration/mean_confidence": 0.48078719925966695, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 795.2, "completions/max_terminated_length": 795.2, "completions/mean_length": 223.0029296875, "completions/mean_terminated_length": 223.2849548339844, "completions/min_length": 22.0, "completions/min_terminated_length": 101.4, "epoch": 0.688, "grad_norm": 0.000916535675060004, "learning_rate": 1e-06, "loss": -0.0012, "num_tokens": 728435455.0, "reward": 0.9521884322166443, "reward_std": 0.07523634880781174, "rewards/accuracy_reward": 0.53291015625, "rewards/brier_reward": 0.7939005017280578, "rewards/confidence_uniqueness_reward": 0.9498188853263855, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.13943086341023445, "rewards/frontier_coverage_1": 0.13943086341023445, "rewards/frontier_coverage_10": 0.13533695340156554, "rewards/frontier_coverage_15": 0.11558721587061882, "rewards/frontier_coverage_20": 0.09408902376890182, "rewards/frontier_coverage_25": 0.07558858171105384, "rewards/frontier_coverage_5": 0.13943086341023445, "signal/accuracy_reward/centered_abs_mean": 0.106097412109375, "signal/accuracy_reward/group_std_mean": 0.13813600242137908, "signal/accuracy_reward/group_zero_std_frac": 0.6125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0530487060546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0530487060546875, "signal/advantage_abs_mean": 0.05597299709916115, "signal/advantage_pre_scale_abs_mean": 0.05597299709916115, "signal/advantage_pre_scale_std": 0.10407697558403015, "signal/advantage_std": 0.10407697558403015, "signal/brier_reward/centered_abs_mean": 0.12551425993442536, "signal/brier_reward/group_std_mean": 0.1616061270236969, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012551426328718663, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012551426328718663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023624447733163835, "signal/confidence_uniqueness_reward/group_std_mean": 0.032869836688041686, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023624447640031576, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023624447640031576, "signal/format_reward/centered_abs_mean": 0.002459716796875, "signal/format_reward/group_std_mean": 0.007181553123518825, "signal/format_reward/group_zero_std_frac": 0.959375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012298583984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012298583984375, "signal/frontier_coverage_0/centered_abs_mean": 0.17795217037200928, "signal/frontier_coverage_0/group_std_mean": 0.2277902901172638, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0025447160936892033, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025447160936892033, "signal/frontier_coverage_1/centered_abs_mean": 0.17795217037200928, "signal/frontier_coverage_1/group_std_mean": 0.2277902901172638, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0025447160936892033, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025447160936892033, "signal/frontier_coverage_10/centered_abs_mean": 0.16942830383777618, "signal/frontier_coverage_10/group_std_mean": 0.21730588376522064, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002422824781388044, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002422824781388044, "signal/frontier_coverage_15/centered_abs_mean": 0.14047065675258635, "signal/frontier_coverage_15/group_std_mean": 0.1811319559812546, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00200873042922467, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00200873042922467, "signal/frontier_coverage_20/centered_abs_mean": 0.1157080888748169, "signal/frontier_coverage_20/group_std_mean": 0.14973447024822234, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0016546256374567747, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016546256374567747, "signal/frontier_coverage_25/centered_abs_mean": 0.09142259359359742, "signal/frontier_coverage_25/group_std_mean": 0.11834468692541122, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013073430862277746, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013073430862277746, "signal/frontier_coverage_5/centered_abs_mean": 0.17795217037200928, "signal/frontier_coverage_5/group_std_mean": 0.2277902901172638, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0025447160936892033, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025447160936892033, "step": 215 }, { "calibration/aurc": 0.2512719069451267, "calibration/batch_distribution_entropy": 0.9361608966618207, "calibration/buffer_distribution_entropy": 0.9529173332818509, "calibration/confidence_entropy": 0.4415924822970334, "calibration/coverage@0%": 0.01840379302405894, "calibration/coverage@1%": 0.01840379302405894, "calibration/coverage@10%": 0.06894454057307856, "calibration/coverage@15%": 0.18214830709681135, "calibration/coverage@20%": 0.34576391986589156, "calibration/coverage@25%": 0.5378147123575457, "calibration/coverage@30%": 0.7360634874669045, "calibration/coverage@5%": 0.04623161165150992, "calibration/ece": 0.11434603267232472, "calibration/mean_confidence": 0.4953933710343634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 581.8, "completions/max_terminated_length": 581.8, "completions/mean_length": 222.84072265625, "completions/mean_terminated_length": 223.0822326660156, "completions/min_length": 21.0, "completions/min_terminated_length": 101.8, "epoch": 0.704, "grad_norm": 0.0006802778807468712, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 745583488.0, "reward": 0.9577384948730469, "reward_std": 0.06939697116613389, "rewards/accuracy_reward": 0.54189453125, "rewards/brier_reward": 0.8031554341316223, "rewards/confidence_uniqueness_reward": 0.9526785373687744, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.1384565994143486, "rewards/frontier_coverage_1": 0.1384565994143486, "rewards/frontier_coverage_10": 0.12583428174257277, "rewards/frontier_coverage_15": 0.10680036693811416, "rewards/frontier_coverage_20": 0.09270759522914887, "rewards/frontier_coverage_25": 0.08056866303086281, "rewards/frontier_coverage_5": 0.1385010600090027, "signal/accuracy_reward/centered_abs_mean": 0.086895751953125, "signal/accuracy_reward/group_std_mean": 0.1153362974524498, "signal/accuracy_reward/group_zero_std_frac": 0.659375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0434478759765625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0434478759765625, "signal/advantage_abs_mean": 0.05145658850669861, "signal/advantage_pre_scale_abs_mean": 0.05145658850669861, "signal/advantage_pre_scale_std": 0.09849272668361664, "signal/advantage_std": 0.09849272668361664, "signal/brier_reward/centered_abs_mean": 0.12422804683446884, "signal/brier_reward/group_std_mean": 0.1597428023815155, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012422805279493332, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012422805279493332, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021860988438129426, "signal/confidence_uniqueness_reward/group_std_mean": 0.03040802404284477, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021860988344997168, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021860988344997168, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390345454216, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.16862587928771972, "signal/frontier_coverage_0/group_std_mean": 0.21537420451641082, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002411350002512336, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002411350002512336, "signal/frontier_coverage_1/centered_abs_mean": 0.16862587928771972, "signal/frontier_coverage_1/group_std_mean": 0.21537420451641082, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002411350002512336, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002411350002512336, "signal/frontier_coverage_10/centered_abs_mean": 0.1515301063656807, "signal/frontier_coverage_10/group_std_mean": 0.19380762279033661, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021668805042281748, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021668805042281748, "signal/frontier_coverage_15/centered_abs_mean": 0.12467771619558335, "signal/frontier_coverage_15/group_std_mean": 0.15953091979026796, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0017828913405537605, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017828913405537605, "signal/frontier_coverage_20/centered_abs_mean": 0.09930351227521897, "signal/frontier_coverage_20/group_std_mean": 0.12707742750644685, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001420040219090879, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001420040219090879, "signal/frontier_coverage_25/centered_abs_mean": 0.07597752884030343, "signal/frontier_coverage_25/group_std_mean": 0.09732898026704788, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010864786920137704, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010864786920137704, "signal/frontier_coverage_5/centered_abs_mean": 0.16813057661056519, "signal/frontier_coverage_5/group_std_mean": 0.21476930379867554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0024042673408985137, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024042673408985137, "step": 220 }, { "calibration/aurc": 0.22253628887566307, "calibration/batch_distribution_entropy": 0.9450462719837344, "calibration/buffer_distribution_entropy": 0.9572577058713276, "calibration/confidence_entropy": 0.4206489216290737, "calibration/coverage@0%": 0.06993792808219178, "calibration/coverage@1%": 0.08751605308219178, "calibration/coverage@10%": 0.26743364726027397, "calibration/coverage@15%": 0.4586464958414872, "calibration/coverage@20%": 0.5466005687377691, "calibration/coverage@25%": 0.6150004586594913, "calibration/coverage@30%": 0.6967007093933464, "calibration/coverage@5%": 0.16965967465753423, "calibration/ece": 0.14508923354516995, "calibration/mean_confidence": 0.5053411371799115, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00146484375, "completions/max_length": 632.6, "completions/max_terminated_length": 632.6, "completions/mean_length": 222.9833984375, "completions/mean_terminated_length": 223.31114196777344, "completions/min_length": 19.8, "completions/min_terminated_length": 104.2, "epoch": 0.72, "grad_norm": 0.0008460658136755228, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 762876694.0, "reward": 0.9683452606201172, "reward_std": 0.0708312913775444, "rewards/accuracy_reward": 0.56240234375, "rewards/brier_reward": 0.8110374331474304, "rewards/confidence_uniqueness_reward": 0.9519586086273193, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.1352271929383278, "rewards/frontier_coverage_1": 0.1352271929383278, "rewards/frontier_coverage_10": 0.1254192978143692, "rewards/frontier_coverage_15": 0.111299267411232, "rewards/frontier_coverage_20": 0.08802737891674042, "rewards/frontier_coverage_25": 0.08286877870559692, "rewards/frontier_coverage_5": 0.134917189180851, "signal/accuracy_reward/centered_abs_mean": 0.088519287109375, "signal/accuracy_reward/group_std_mean": 0.12481878697872162, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442596435546875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442596435546875, "signal/advantage_abs_mean": 0.04948367401957512, "signal/advantage_pre_scale_abs_mean": 0.04948367401957512, "signal/advantage_pre_scale_std": 0.09783572107553482, "signal/advantage_std": 0.09783572107553482, "signal/brier_reward/centered_abs_mean": 0.11841019541025162, "signal/brier_reward/group_std_mean": 0.154974827170372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011841019801795482, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011841019801795482, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02254415713250637, "signal/confidence_uniqueness_reward/group_std_mean": 0.032048237323760984, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022544157691299915, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022544157691299915, "signal/format_reward/centered_abs_mean": 0.0030029296875, "signal/format_reward/group_std_mean": 0.008166217897087335, "signal/format_reward/group_zero_std_frac": 0.95625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00150146484375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00150146484375, "signal/frontier_coverage_0/centered_abs_mean": 0.1640920639038086, "signal/frontier_coverage_0/group_std_mean": 0.21496865749359131, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0023465165868401527, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023465165868401527, "signal/frontier_coverage_1/centered_abs_mean": 0.1640920639038086, "signal/frontier_coverage_1/group_std_mean": 0.21496865749359131, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0023465165868401527, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023465165868401527, "signal/frontier_coverage_10/centered_abs_mean": 0.14849277436733246, "signal/frontier_coverage_10/group_std_mean": 0.19446865618228912, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002123446692712605, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002123446692712605, "signal/frontier_coverage_15/centered_abs_mean": 0.1182610735297203, "signal/frontier_coverage_15/group_std_mean": 0.1555154412984848, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016911332961171866, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016911332961171866, "signal/frontier_coverage_20/centered_abs_mean": 0.08573998808860779, "signal/frontier_coverage_20/group_std_mean": 0.11260216832160949, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001226081815548241, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001226081815548241, "signal/frontier_coverage_25/centered_abs_mean": 0.06338882744312287, "signal/frontier_coverage_25/group_std_mean": 0.08182145059108734, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009064602083526552, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009064602083526552, "signal/frontier_coverage_5/centered_abs_mean": 0.1630953937768936, "signal/frontier_coverage_5/group_std_mean": 0.2136875331401825, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002332264045253396, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002332264045253396, "step": 225 }, { "calibration/aurc": 0.24824451300334033, "calibration/batch_distribution_entropy": 0.9365923239236968, "calibration/buffer_distribution_entropy": 0.963191063037019, "calibration/confidence_entropy": 0.4336782707003815, "calibration/coverage@0%": 0.022297794117647058, "calibration/coverage@1%": 0.022297794117647058, "calibration/coverage@10%": 0.08643829920666897, "calibration/coverage@15%": 0.30066691188461686, "calibration/coverage@20%": 0.46180249282932345, "calibration/coverage@25%": 0.5868881812142664, "calibration/coverage@30%": 0.672549124529949, "calibration/coverage@5%": 0.03950287606480948, "calibration/ece": 0.13127255862630277, "calibration/mean_confidence": 0.5733970891471901, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 864.6, "completions/max_terminated_length": 864.6, "completions/mean_length": 223.4474609375, "completions/mean_terminated_length": 223.73309936523438, "completions/min_length": 19.6, "completions/min_terminated_length": 95.0, "epoch": 0.736, "grad_norm": 0.0006176315364427865, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 780104380.0, "reward": 0.9651668429374695, "reward_std": 0.0696649581193924, "rewards/accuracy_reward": 0.56181640625, "rewards/brier_reward": 0.7952903747558594, "rewards/confidence_uniqueness_reward": 0.9530894517898559, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.1159910187125206, "rewards/frontier_coverage_1": 0.1159910187125206, "rewards/frontier_coverage_10": 0.10471980273723602, "rewards/frontier_coverage_15": 0.08863085955381393, "rewards/frontier_coverage_20": 0.0780431255698204, "rewards/frontier_coverage_25": 0.08450771719217301, "rewards/frontier_coverage_5": 0.11529324352741241, "signal/accuracy_reward/centered_abs_mean": 0.085174560546875, "signal/accuracy_reward/group_std_mean": 0.11592481285333633, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0425872802734375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0425872802734375, "signal/advantage_abs_mean": 0.05093270391225815, "signal/advantage_pre_scale_abs_mean": 0.05093270391225815, "signal/advantage_pre_scale_std": 0.09835466593503953, "signal/advantage_std": 0.09835466593503953, "signal/brier_reward/centered_abs_mean": 0.12378608733415604, "signal/brier_reward/group_std_mean": 0.16011227071285247, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01237860918045044, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01237860918045044, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022135768830776215, "signal/confidence_uniqueness_reward/group_std_mean": 0.03068559318780899, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002213576971553266, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002213576971553266, "signal/format_reward/centered_abs_mean": 0.002423095703125, "signal/format_reward/group_std_mean": 0.006172627722844481, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012115478515625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012115478515625, "signal/frontier_coverage_0/centered_abs_mean": 0.15735355913639068, "signal/frontier_coverage_0/group_std_mean": 0.20486867129802705, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002250155946239829, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002250155946239829, "signal/frontier_coverage_1/centered_abs_mean": 0.15735355913639068, "signal/frontier_coverage_1/group_std_mean": 0.20486867129802705, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002250155946239829, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002250155946239829, "signal/frontier_coverage_10/centered_abs_mean": 0.13432896435260772, "signal/frontier_coverage_10/group_std_mean": 0.1756158649921417, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019209041725844144, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019209041725844144, "signal/frontier_coverage_15/centered_abs_mean": 0.10221495479345322, "signal/frontier_coverage_15/group_std_mean": 0.13430811911821366, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014616738073527813, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014616738073527813, "signal/frontier_coverage_20/centered_abs_mean": 0.0775189757347107, "signal/frontier_coverage_20/group_std_mean": 0.10161522030830383, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011085212929174304, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011085212929174304, "signal/frontier_coverage_25/centered_abs_mean": 0.069071663916111, "signal/frontier_coverage_25/group_std_mean": 0.0897357627749443, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009877247619442641, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009877247619442641, "signal/frontier_coverage_5/centered_abs_mean": 0.1556430786848068, "signal/frontier_coverage_5/group_std_mean": 0.2026938110589981, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002225696016103029, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002225696016103029, "step": 230 }, { "calibration/aurc": 0.26965961509465136, "calibration/batch_distribution_entropy": 0.931343074157844, "calibration/buffer_distribution_entropy": 0.9534197325324987, "calibration/confidence_entropy": 0.42210382299186167, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.128515625, "calibration/coverage@15%": 0.20675551470588233, "calibration/coverage@20%": 0.4109256557931392, "calibration/coverage@25%": 0.5564118678293235, "calibration/coverage@30%": 0.6553425556866582, "calibration/coverage@5%": 0.06171875, "calibration/ece": 0.10822966591979921, "calibration/mean_confidence": 0.4941520088048591, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 893.4, "completions/max_terminated_length": 893.4, "completions/mean_length": 220.48369140625, "completions/mean_terminated_length": 220.721337890625, "completions/min_length": 20.4, "completions/min_terminated_length": 102.2, "epoch": 0.752, "grad_norm": 0.000702829216606915, "learning_rate": 1e-06, "loss": -0.0009, "num_tokens": 797589333.0, "reward": 0.966651451587677, "reward_std": 0.06932897940278053, "rewards/accuracy_reward": 0.56298828125, "rewards/brier_reward": 0.7999367952346802, "rewards/confidence_uniqueness_reward": 0.9526612877845764, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.1230026513338089, "rewards/frontier_coverage_1": 0.1230026513338089, "rewards/frontier_coverage_10": 0.1173777550458908, "rewards/frontier_coverage_15": 0.09979364722967148, "rewards/frontier_coverage_20": 0.07793587669730187, "rewards/frontier_coverage_25": 0.06709063202142715, "rewards/frontier_coverage_5": 0.12149005383253098, "signal/accuracy_reward/centered_abs_mean": 0.081842041015625, "signal/accuracy_reward/group_std_mean": 0.11334883570671081, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0409210205078125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0409210205078125, "signal/advantage_abs_mean": 0.049368849396705626, "signal/advantage_pre_scale_abs_mean": 0.049368849396705626, "signal/advantage_pre_scale_std": 0.0992576465010643, "signal/advantage_std": 0.0992576465010643, "signal/brier_reward/centered_abs_mean": 0.1113414391875267, "signal/brier_reward/group_std_mean": 0.14791098535060881, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01113414391875267, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01113414391875267, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02176951803267002, "signal/confidence_uniqueness_reward/group_std_mean": 0.030010972917079926, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021769518963992594, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021769518963992594, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390298888088, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.14007504731416703, "signal/frontier_coverage_0/group_std_mean": 0.1843056410551071, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002003073110245168, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002003073110245168, "signal/frontier_coverage_1/centered_abs_mean": 0.14007504731416703, "signal/frontier_coverage_1/group_std_mean": 0.1843056410551071, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002003073110245168, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002003073110245168, "signal/frontier_coverage_10/centered_abs_mean": 0.12636134028434753, "signal/frontier_coverage_10/group_std_mean": 0.16672152578830718, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018069671699777246, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018069671699777246, "signal/frontier_coverage_15/centered_abs_mean": 0.10274002999067307, "signal/frontier_coverage_15/group_std_mean": 0.13605080544948578, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0014691824093461038, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014691824093461038, "signal/frontier_coverage_20/centered_abs_mean": 0.07285871803760528, "signal/frontier_coverage_20/group_std_mean": 0.09634722769260406, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010418796446174383, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010418796446174383, "signal/frontier_coverage_25/centered_abs_mean": 0.05874115601181984, "signal/frontier_coverage_25/group_std_mean": 0.07601820230484009, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008399985264986754, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008399985264986754, "signal/frontier_coverage_5/centered_abs_mean": 0.13739684820175171, "signal/frontier_coverage_5/group_std_mean": 0.18090024590492249, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.001964774914085865, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001964774914085865, "step": 235 }, { "calibration/aurc": 0.2754913190231371, "calibration/batch_distribution_entropy": 0.9653659337495298, "calibration/buffer_distribution_entropy": 0.9563910168815924, "calibration/confidence_entropy": 0.46569203051240315, "calibration/coverage@0%": 0.05728876142194554, "calibration/coverage@1%": 0.05728876142194554, "calibration/coverage@10%": 0.22487203881727966, "calibration/coverage@15%": 0.29580118904764474, "calibration/coverage@20%": 0.3658782739379324, "calibration/coverage@25%": 0.4399407606957949, "calibration/coverage@30%": 0.5046889265393624, "calibration/coverage@5%": 0.14967154683575512, "calibration/ece": 0.1645633654886728, "calibration/mean_confidence": 0.517807627513297, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 835.2, "completions/max_terminated_length": 835.2, "completions/mean_length": 225.77333984375, "completions/mean_terminated_length": 226.01569519042968, "completions/min_length": 0.0, "completions/min_terminated_length": 107.8, "epoch": 0.768, "grad_norm": 0.0008803294622339308, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 814833956.0, "reward": 0.9462002873420715, "reward_std": 0.07274475544691086, "rewards/accuracy_reward": 0.51416015625, "rewards/brier_reward": 0.8063553452491761, "rewards/confidence_uniqueness_reward": 0.9532873511314393, "rewards/format_reward": 0.99892578125, "rewards/frontier_coverage_0": 0.1558481901884079, "rewards/frontier_coverage_1": 0.1558481901884079, "rewards/frontier_coverage_10": 0.1498277723789215, "rewards/frontier_coverage_15": 0.1354019284248352, "rewards/frontier_coverage_20": 0.11420131176710129, "rewards/frontier_coverage_25": 0.09058187007904053, "rewards/frontier_coverage_5": 0.1558481901884079, "signal/accuracy_reward/centered_abs_mean": 0.088494873046875, "signal/accuracy_reward/group_std_mean": 0.11916659921407699, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0442474365234375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0442474365234375, "signal/advantage_abs_mean": 0.05352484881877899, "signal/advantage_pre_scale_abs_mean": 0.05352484881877899, "signal/advantage_pre_scale_std": 0.1029602348804474, "signal/advantage_std": 0.1029602348804474, "signal/brier_reward/centered_abs_mean": 0.11680269986391068, "signal/brier_reward/group_std_mean": 0.15063838958740233, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011680270358920098, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011680270358920098, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021855100244283675, "signal/confidence_uniqueness_reward/group_std_mean": 0.029894111678004265, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021855100989341737, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021855100989341737, "signal/format_reward/centered_abs_mean": 0.002069091796875, "signal/format_reward/group_std_mean": 0.005740390345454216, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0010345458984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010345458984375, "signal/frontier_coverage_0/centered_abs_mean": 0.1492400586605072, "signal/frontier_coverage_0/group_std_mean": 0.1952953338623047, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002134132944047451, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002134132944047451, "signal/frontier_coverage_1/centered_abs_mean": 0.1492400586605072, "signal/frontier_coverage_1/group_std_mean": 0.1952953338623047, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002134132944047451, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002134132944047451, "signal/frontier_coverage_10/centered_abs_mean": 0.14196249544620515, "signal/frontier_coverage_10/group_std_mean": 0.18581181466579438, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002030063676647842, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002030063676647842, "signal/frontier_coverage_15/centered_abs_mean": 0.11799918264150619, "signal/frontier_coverage_15/group_std_mean": 0.15506875813007354, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016873883083462714, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016873883083462714, "signal/frontier_coverage_20/centered_abs_mean": 0.09261109083890914, "signal/frontier_coverage_20/group_std_mean": 0.12177760004997254, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013243386289104818, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013243386289104818, "signal/frontier_coverage_25/centered_abs_mean": 0.06712948903441429, "signal/frontier_coverage_25/group_std_mean": 0.0871183454990387, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009599516983143985, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009599516983143985, "signal/frontier_coverage_5/centered_abs_mean": 0.1492400586605072, "signal/frontier_coverage_5/group_std_mean": 0.1952953338623047, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002134132944047451, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002134132944047451, "step": 240 }, { "calibration/aurc": 0.34061015213100465, "calibration/batch_distribution_entropy": 0.9248090393225793, "calibration/buffer_distribution_entropy": 0.966630339904, "calibration/confidence_entropy": 0.41748047920813597, "calibration/coverage@0%": 0.009379592590460843, "calibration/coverage@1%": 0.009379592590460843, "calibration/coverage@10%": 0.15122242347281378, "calibration/coverage@15%": 0.2530637224924216, "calibration/coverage@20%": 0.31062806072771576, "calibration/coverage@25%": 0.3466390901394804, "calibration/coverage@30%": 0.37598804847281375, "calibration/coverage@5%": 0.09453584259046084, "calibration/ece": 0.15827124658513475, "calibration/mean_confidence": 0.5404969754414958, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 775.4, "completions/max_terminated_length": 775.4, "completions/mean_length": 225.9041015625, "completions/mean_terminated_length": 226.12729187011718, "completions/min_length": 20.0, "completions/min_terminated_length": 106.8, "epoch": 0.784, "grad_norm": 0.0007201886037364602, "learning_rate": 1e-06, "loss": -0.0007, "num_tokens": 832321582.0, "reward": 0.9589765906333924, "reward_std": 0.07071957588195801, "rewards/accuracy_reward": 0.5533203125, "rewards/brier_reward": 0.7801745533943176, "rewards/confidence_uniqueness_reward": 0.9526117444038391, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.10734488293528557, "rewards/frontier_coverage_1": 0.10734488293528557, "rewards/frontier_coverage_10": 0.10519935935735703, "rewards/frontier_coverage_15": 0.09717852100729943, "rewards/frontier_coverage_20": 0.07842598631978034, "rewards/frontier_coverage_25": 0.06332094371318817, "rewards/frontier_coverage_5": 0.10734488293528557, "signal/accuracy_reward/centered_abs_mean": 0.0907470703125, "signal/accuracy_reward/group_std_mean": 0.1225023627281189, "signal/accuracy_reward/group_zero_std_frac": 0.6375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.04537353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04537353515625, "signal/advantage_abs_mean": 0.052248618006706236, "signal/advantage_pre_scale_abs_mean": 0.052248618006706236, "signal/advantage_pre_scale_std": 0.09925459623336792, "signal/advantage_std": 0.09925459623336792, "signal/brier_reward/centered_abs_mean": 0.12520308941602706, "signal/brier_reward/group_std_mean": 0.16114262938499452, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01252030897885561, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01252030897885561, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022027900069952012, "signal/confidence_uniqueness_reward/group_std_mean": 0.030153784900903702, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022027899976819754, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022027899976819754, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963135540485, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_coverage_0/centered_abs_mean": 0.15730546414852142, "signal/frontier_coverage_0/group_std_mean": 0.20497536659240723, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0022494681645184754, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022494681645184754, "signal/frontier_coverage_1/centered_abs_mean": 0.15730546414852142, "signal/frontier_coverage_1/group_std_mean": 0.20497536659240723, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0022494681645184754, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022494681645184754, "signal/frontier_coverage_10/centered_abs_mean": 0.15118659734725953, "signal/frontier_coverage_10/group_std_mean": 0.19713300466537476, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002161968289874494, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002161968289874494, "signal/frontier_coverage_15/centered_abs_mean": 0.14073525965213776, "signal/frontier_coverage_15/group_std_mean": 0.18378305733203887, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0020125140668824314, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020125140668824314, "signal/frontier_coverage_20/centered_abs_mean": 0.100088232755661, "signal/frontier_coverage_20/group_std_mean": 0.13115270733833312, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001431261678226292, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001431261678226292, "signal/frontier_coverage_25/centered_abs_mean": 0.07233644723892212, "signal/frontier_coverage_25/group_std_mean": 0.09395631700754166, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010344112175516783, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010344112175516783, "signal/frontier_coverage_5/centered_abs_mean": 0.15730546414852142, "signal/frontier_coverage_5/group_std_mean": 0.20497536659240723, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0022494681645184754, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022494681645184754, "step": 245 }, { "calibration/aurc": 0.188944295291611, "calibration/batch_distribution_entropy": 0.9274885575581143, "calibration/buffer_distribution_entropy": 0.953439624363039, "calibration/confidence_entropy": 0.4285214943213724, "calibration/coverage@0%": 0.07191398605675146, "calibration/coverage@1%": 0.07191398605675146, "calibration/coverage@10%": 0.3349567331213307, "calibration/coverage@15%": 0.5131688784246575, "calibration/coverage@20%": 0.6077322345890411, "calibration/coverage@25%": 0.6776793358610568, "calibration/coverage@30%": 0.7593390716731898, "calibration/coverage@5%": 0.15942621697651665, "calibration/ece": 0.11407683786445302, "calibration/mean_confidence": 0.5089776058348807, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000390625, "completions/max_length": 795.8, "completions/max_terminated_length": 795.8, "completions/mean_length": 225.09853515625, "completions/mean_terminated_length": 225.18656005859376, "completions/min_length": 24.2, "completions/min_terminated_length": 113.8, "epoch": 0.8, "grad_norm": 0.0007698666886426508, "learning_rate": 1e-06, "loss": 0.0001, "num_tokens": 849637151.0, "reward": 0.9806387066841126, "reward_std": 0.06646973639726639, "rewards/accuracy_reward": 0.589453125, "rewards/brier_reward": 0.8106509804725647, "rewards/confidence_uniqueness_reward": 0.9531700253486634, "rewards/format_reward": 0.999609375, "rewards/frontier_coverage_0": 0.11020932197570801, "rewards/frontier_coverage_1": 0.11020932197570801, "rewards/frontier_coverage_10": 0.10672755688428878, "rewards/frontier_coverage_15": 0.09707566052675247, "rewards/frontier_coverage_20": 0.07785675972700119, "rewards/frontier_coverage_25": 0.06814835816621781, "rewards/frontier_coverage_5": 0.10986628532409667, "signal/accuracy_reward/centered_abs_mean": 0.08389892578125, "signal/accuracy_reward/group_std_mean": 0.1153394877910614, "signal/accuracy_reward/group_zero_std_frac": 0.65625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.041949462890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.041949462890625, "signal/advantage_abs_mean": 0.04819626733660698, "signal/advantage_pre_scale_abs_mean": 0.04819626733660698, "signal/advantage_pre_scale_std": 0.09706049710512162, "signal/advantage_std": 0.09706049710512162, "signal/brier_reward/centered_abs_mean": 0.10596445500850678, "signal/brier_reward/group_std_mean": 0.13802684843540192, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010596446134150029, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010596446134150029, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021166018024086953, "signal/confidence_uniqueness_reward/group_std_mean": 0.027424711734056473, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021166018676012756, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021166018676012756, "signal/format_reward/centered_abs_mean": 0.0007568359375, "signal/format_reward/group_std_mean": 0.0022097086533904076, "signal/format_reward/group_zero_std_frac": 0.9875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00037841796875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00037841796875, "signal/frontier_coverage_0/centered_abs_mean": 0.1317434698343277, "signal/frontier_coverage_0/group_std_mean": 0.17327735126018523, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.001883931620977819, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001883931620977819, "signal/frontier_coverage_1/centered_abs_mean": 0.1317434698343277, "signal/frontier_coverage_1/group_std_mean": 0.17327735126018523, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.001883931620977819, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001883931620977819, "signal/frontier_coverage_10/centered_abs_mean": 0.12727195620536805, "signal/frontier_coverage_10/group_std_mean": 0.16740552484989166, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018199889454990625, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018199889454990625, "signal/frontier_coverage_15/centered_abs_mean": 0.11404764950275421, "signal/frontier_coverage_15/group_std_mean": 0.15062055885791778, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016308813821524382, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016308813821524382, "signal/frontier_coverage_20/centered_abs_mean": 0.08656607568264008, "signal/frontier_coverage_20/group_std_mean": 0.11483617275953292, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0012378948042169214, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012378948042169214, "signal/frontier_coverage_25/centered_abs_mean": 0.068538336455822, "signal/frontier_coverage_25/group_std_mean": 0.0897128164768219, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009800982195883989, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009800982195883989, "signal/frontier_coverage_5/centered_abs_mean": 0.1310385376214981, "signal/frontier_coverage_5/group_std_mean": 0.17232318818569184, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018738510785624384, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018738510785624384, "step": 250 }, { "epoch": 0.8, "eval_calibration/aurc": 0.4476727729252362, "eval_calibration/batch_distribution_entropy": 0.8957618944466985, "eval_calibration/buffer_distribution_entropy": 0.9420687234711133, "eval_calibration/confidence_entropy": 0.43445973687780964, "eval_calibration/coverage@0%": 0.109375, "eval_calibration/coverage@1%": 0.109375, "eval_calibration/coverage@10%": 0.109375, "eval_calibration/coverage@15%": 0.125, "eval_calibration/coverage@20%": 0.1640625, "eval_calibration/coverage@25%": 0.265625, "eval_calibration/coverage@30%": 0.28125, "eval_calibration/coverage@5%": 0.109375, "eval_calibration/ece": 0.2315625, "eval_calibration/mean_confidence": 0.50203125, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 425.75, "eval_completions/max_terminated_length": 425.75, "eval_completions/mean_length": 232.50202178955078, "eval_completions/mean_terminated_length": 232.50202178955078, "eval_completions/min_length": 130.0, "eval_completions/min_terminated_length": 130.0, "eval_loss": 0.0, "eval_num_tokens": 849637151.0, "eval_reward": 0.8972103148698807, "eval_reward_std": 0.2323836162686348, "eval_rewards/accuracy_reward": 0.419921875, "eval_rewards/brier_reward": 0.7953435629606247, "eval_rewards/confidence_uniqueness_reward": 0.896728515625, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.21285292878746986, "eval_rewards/frontier_coverage_1": 0.21285292878746986, "eval_rewards/frontier_coverage_10": 0.20865214988589287, "eval_rewards/frontier_coverage_15": 0.19585801288485527, "eval_rewards/frontier_coverage_20": 0.14357871003448963, "eval_rewards/frontier_coverage_25": 0.07651386596262455, "eval_rewards/frontier_coverage_5": 0.21138105168938637, "eval_runtime": 21.2115, "eval_samples_per_second": 23.572, "eval_signal/accuracy_reward/centered_abs_mean": 0.4720458984375, "eval_signal/accuracy_reward/group_std_mean": 0.49325957894325256, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23602294921875, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23602294921875, "eval_signal/advantage_abs_mean": 0.21680431440472603, "eval_signal/advantage_pre_scale_abs_mean": 0.21680431440472603, "eval_signal/advantage_pre_scale_std": 0.22975903004407883, "eval_signal/advantage_std": 0.22975903004407883, "eval_signal/brier_reward/centered_abs_mean": 0.2153194323182106, "eval_signal/brier_reward/group_std_mean": 0.2603638060390949, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.021531942766159773, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.021531942766159773, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0452117919921875, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.053748167119920254, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0045211793622002006, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045211793622002006, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.36268793791532516, "eval_signal/frontier_coverage_0/group_std_mean": 0.4430076330900192, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005186437512747943, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005186437512747943, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.36268793791532516, "eval_signal/frontier_coverage_1/group_std_mean": 0.4430076330900192, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005186437512747943, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005186437512747943, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.3564353659749031, "eval_signal/frontier_coverage_10/group_std_mean": 0.4355946108698845, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050970257725566626, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0050970257725566626, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.3375644385814667, "eval_signal/frontier_coverage_15/group_std_mean": 0.41321366280317307, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0048271710984408855, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0048271710984408855, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.24088909849524498, "eval_signal/frontier_coverage_20/group_std_mean": 0.2981100380420685, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0034447142388671637, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0034447142388671637, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.11748435348272324, "eval_signal/frontier_coverage_25/group_std_mean": 0.14750002324581146, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0016800262674223632, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016800262674223632, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.36068040132522583, "eval_signal/frontier_coverage_5/group_std_mean": 0.4406013935804367, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.005157729727216065, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.005157729727216065, "eval_steps_per_second": 0.189, "step": 250 }, { "calibration/aurc": 0.23505807779794913, "calibration/batch_distribution_entropy": 0.9013709907814249, "calibration/buffer_distribution_entropy": 0.9409831599796826, "calibration/confidence_entropy": 0.40754073413461833, "calibration/coverage@0%": 0.021875, "calibration/coverage@1%": 0.021875, "calibration/coverage@10%": 0.08359375, "calibration/coverage@15%": 0.207421875, "calibration/coverage@20%": 0.3484451443248532, "calibration/coverage@25%": 0.6675238502935421, "calibration/coverage@30%": 0.7984306201076321, "calibration/coverage@5%": 0.062890625, "calibration/ece": 0.13932734169649402, "calibration/mean_confidence": 0.5523449613856978, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 635.6, "completions/max_terminated_length": 635.6, "completions/mean_length": 222.9716796875, "completions/mean_terminated_length": 223.14452514648437, "completions/min_length": 22.2, "completions/min_terminated_length": 112.4, "epoch": 0.816, "grad_norm": 0.0009446038166061044, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 867019549.0, "reward": 0.9719715356826782, "reward_std": 0.07123408019542694, "rewards/accuracy_reward": 0.58046875, "rewards/brier_reward": 0.785218346118927, "rewards/confidence_uniqueness_reward": 0.9532184958457947, "rewards/format_reward": 0.99921875, "rewards/frontier_coverage_0": 0.09102783054113388, "rewards/frontier_coverage_1": 0.09102783054113388, "rewards/frontier_coverage_10": 0.08938535004854202, "rewards/frontier_coverage_15": 0.08647352084517479, "rewards/frontier_coverage_20": 0.07352498024702073, "rewards/frontier_coverage_25": 0.05705418214201927, "rewards/frontier_coverage_5": 0.09081372916698456, "signal/accuracy_reward/centered_abs_mean": 0.09835205078125, "signal/accuracy_reward/group_std_mean": 0.1273781567811966, "signal/accuracy_reward/group_zero_std_frac": 0.646875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.049176025390625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.049176025390625, "signal/advantage_abs_mean": 0.05432458594441414, "signal/advantage_pre_scale_abs_mean": 0.05432458594441414, "signal/advantage_pre_scale_std": 0.10411206483840943, "signal/advantage_std": 0.10411206483840943, "signal/brier_reward/centered_abs_mean": 0.12405794858932495, "signal/brier_reward/group_std_mean": 0.15806553959846498, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.012405795231461524, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012405795231461524, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021034000813961028, "signal/confidence_uniqueness_reward/group_std_mean": 0.02808857224881649, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021034001372754576, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021034001372754576, "signal/format_reward/centered_abs_mean": 0.00147705078125, "signal/format_reward/group_std_mean": 0.0036875875666737556, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000738525390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000738525390625, "signal/frontier_coverage_0/centered_abs_mean": 0.15168525874614716, "signal/frontier_coverage_0/group_std_mean": 0.19630924761295318, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00216909924056381, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00216909924056381, "signal/frontier_coverage_1/centered_abs_mean": 0.15168525874614716, "signal/frontier_coverage_1/group_std_mean": 0.19630924761295318, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00216909924056381, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00216909924056381, "signal/frontier_coverage_10/centered_abs_mean": 0.14648787081241607, "signal/frontier_coverage_10/group_std_mean": 0.1897138088941574, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.002094776462763548, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002094776462763548, "signal/frontier_coverage_15/centered_abs_mean": 0.1395682379603386, "signal/frontier_coverage_15/group_std_mean": 0.18085283041000366, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001995825790800154, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001995825790800154, "signal/frontier_coverage_20/centered_abs_mean": 0.10575809627771378, "signal/frontier_coverage_20/group_std_mean": 0.13796985149383545, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015123408054932951, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015123408054932951, "signal/frontier_coverage_25/centered_abs_mean": 0.06511625275015831, "signal/frontier_coverage_25/group_std_mean": 0.08438750207424164, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000931162410415709, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000931162410415709, "signal/frontier_coverage_5/centered_abs_mean": 0.15083783268928527, "signal/frontier_coverage_5/group_std_mean": 0.1952424615621567, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021569809876382353, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021569809876382353, "step": 255 }, { "calibration/aurc": 0.270215333264285, "calibration/batch_distribution_entropy": 0.9305754323811246, "calibration/buffer_distribution_entropy": 0.9413314910446944, "calibration/confidence_entropy": 0.4391170086291357, "calibration/coverage@0%": 0.044930283757338554, "calibration/coverage@1%": 0.06524278375733855, "calibration/coverage@10%": 0.22700587084148727, "calibration/coverage@15%": 0.29698125611545984, "calibration/coverage@20%": 0.33645731409001955, "calibration/coverage@25%": 0.4197628730430528, "calibration/coverage@30%": 0.5198783023483367, "calibration/coverage@5%": 0.18008653375733857, "calibration/ece": 0.1048740737634805, "calibration/mean_confidence": 0.49940577494659866, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00107421875, "completions/max_length": 799.0, "completions/max_terminated_length": 799.0, "completions/mean_length": 226.58212890625, "completions/mean_terminated_length": 226.82774963378907, "completions/min_length": 20.6, "completions/min_terminated_length": 109.0, "epoch": 0.832, "grad_norm": 0.000873431155923754, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 884348102.0, "reward": 0.9639191865921021, "reward_std": 0.0691076509654522, "rewards/accuracy_reward": 0.55126953125, "rewards/brier_reward": 0.8154711127281189, "rewards/confidence_uniqueness_reward": 0.9510850787162781, "rewards/format_reward": 0.998828125, "rewards/frontier_coverage_0": 0.14494312703609466, "rewards/frontier_coverage_1": 0.14494312703609466, "rewards/frontier_coverage_10": 0.13486984968185425, "rewards/frontier_coverage_15": 0.11929207742214203, "rewards/frontier_coverage_20": 0.09058420956134797, "rewards/frontier_coverage_25": 0.07505071610212326, "rewards/frontier_coverage_5": 0.1444932848215103, "signal/accuracy_reward/centered_abs_mean": 0.091925048828125, "signal/accuracy_reward/group_std_mean": 0.12147300839424133, "signal/accuracy_reward/group_zero_std_frac": 0.653125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0459625244140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0459625244140625, "signal/advantage_abs_mean": 0.05150103196501732, "signal/advantage_pre_scale_abs_mean": 0.05150103196501732, "signal/advantage_pre_scale_std": 0.10343928039073944, "signal/advantage_std": 0.10343928039073944, "signal/brier_reward/centered_abs_mean": 0.10446172952651978, "signal/brier_reward/group_std_mean": 0.13371139168739318, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010446173511445523, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010446173511445523, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02175750322639942, "signal/confidence_uniqueness_reward/group_std_mean": 0.030089304223656653, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021757503971457483, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021757503971457483, "signal/format_reward/centered_abs_mean": 0.0022216796875, "signal/format_reward/group_std_mean": 0.005560987768694759, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00111083984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00111083984375, "signal/frontier_coverage_0/centered_abs_mean": 0.1405455082654953, "signal/frontier_coverage_0/group_std_mean": 0.18188858330249785, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0020098007284104825, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020098007284104825, "signal/frontier_coverage_1/centered_abs_mean": 0.1405455082654953, "signal/frontier_coverage_1/group_std_mean": 0.18188858330249785, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0020098007284104825, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020098007284104825, "signal/frontier_coverage_10/centered_abs_mean": 0.12980564832687377, "signal/frontier_coverage_10/group_std_mean": 0.16831763684749604, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.001856220792979002, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001856220792979002, "signal/frontier_coverage_15/centered_abs_mean": 0.1126504197716713, "signal/frontier_coverage_15/group_std_mean": 0.14668649286031724, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016109010437503457, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016109010437503457, "signal/frontier_coverage_20/centered_abs_mean": 0.0767782062292099, "signal/frontier_coverage_20/group_std_mean": 0.10044772624969482, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0010979283368214964, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010979283368214964, "signal/frontier_coverage_25/centered_abs_mean": 0.05782742351293564, "signal/frontier_coverage_25/group_std_mean": 0.0740132749080658, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008269321522675454, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008269321522675454, "signal/frontier_coverage_5/centered_abs_mean": 0.13997873961925505, "signal/frontier_coverage_5/group_std_mean": 0.1811675101518631, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002001695986837149, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002001695986837149, "step": 260 }, { "calibration/aurc": 0.31580053039415606, "calibration/batch_distribution_entropy": 0.943326690831765, "calibration/buffer_distribution_entropy": 0.9552051158612647, "calibration/confidence_entropy": 0.46298817442665785, "calibration/coverage@0%": 0.040234375, "calibration/coverage@1%": 0.040234375, "calibration/coverage@10%": 0.187109375, "calibration/coverage@15%": 0.234765625, "calibration/coverage@20%": 0.415625, "calibration/coverage@25%": 0.49028963040275053, "calibration/coverage@30%": 0.5403586996561887, "calibration/coverage@5%": 0.110546875, "calibration/ece": 0.16090085641235513, "calibration/mean_confidence": 0.5592939710679985, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 719.6, "completions/max_terminated_length": 719.6, "completions/mean_length": 223.590625, "completions/mean_terminated_length": 223.80735473632814, "completions/min_length": 23.6, "completions/min_terminated_length": 108.6, "epoch": 0.848, "grad_norm": 0.0008446628926321864, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 901652038.0, "reward": 0.9539282083511352, "reward_std": 0.06537192910909653, "rewards/accuracy_reward": 0.5345703125, "rewards/brier_reward": 0.8037230730056762, "rewards/confidence_uniqueness_reward": 0.9539141178131103, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.13736073225736617, "rewards/frontier_coverage_1": 0.13736073225736617, "rewards/frontier_coverage_10": 0.12592306435108186, "rewards/frontier_coverage_15": 0.10868992656469345, "rewards/frontier_coverage_20": 0.08467617332935333, "rewards/frontier_coverage_25": 0.06507683843374253, "rewards/frontier_coverage_5": 0.1358505055308342, "signal/accuracy_reward/centered_abs_mean": 0.0785888671875, "signal/accuracy_reward/group_std_mean": 0.10885387361049652, "signal/accuracy_reward/group_zero_std_frac": 0.675, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.03929443359375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.03929443359375, "signal/advantage_abs_mean": 0.04755012765526771, "signal/advantage_pre_scale_abs_mean": 0.04755012765526771, "signal/advantage_pre_scale_std": 0.09628051966428756, "signal/advantage_std": 0.09628051966428756, "signal/brier_reward/centered_abs_mean": 0.1051436722278595, "signal/brier_reward/group_std_mean": 0.13542101085186004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010514367558062076, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010514367558062076, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021031123772263526, "signal/confidence_uniqueness_reward/group_std_mean": 0.028871718794107437, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0021031123818829657, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021031123818829657, "signal/format_reward/centered_abs_mean": 0.0018798828125, "signal/format_reward/group_std_mean": 0.005187963135540485, "signal/format_reward/group_zero_std_frac": 0.971875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00093994140625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00093994140625, "signal/frontier_coverage_0/centered_abs_mean": 0.13309673368930816, "signal/frontier_coverage_0/group_std_mean": 0.17585844099521636, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019032832700759173, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019032832700759173, "signal/frontier_coverage_1/centered_abs_mean": 0.13309673368930816, "signal/frontier_coverage_1/group_std_mean": 0.17585844099521636, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019032832700759173, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019032832700759173, "signal/frontier_coverage_10/centered_abs_mean": 0.12269736975431442, "signal/frontier_coverage_10/group_std_mean": 0.16210621297359468, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017545723589137197, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017545723589137197, "signal/frontier_coverage_15/centered_abs_mean": 0.10646263808012009, "signal/frontier_coverage_15/group_std_mean": 0.14086052179336547, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015224156668409705, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015224156668409705, "signal/frontier_coverage_20/centered_abs_mean": 0.08161805719137191, "signal/frontier_coverage_20/group_std_mean": 0.10817753225564956, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011671381769701838, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011671381769701838, "signal/frontier_coverage_25/centered_abs_mean": 0.05570452064275742, "signal/frontier_coverage_25/group_std_mean": 0.07342620790004731, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007965746102854609, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007965746102854609, "signal/frontier_coverage_5/centered_abs_mean": 0.13184687048196791, "signal/frontier_coverage_5/group_std_mean": 0.1741828888654709, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018854103051126002, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018854103051126002, "step": 265 }, { "calibration/aurc": 0.26603775459386025, "calibration/batch_distribution_entropy": 0.9328918619130213, "calibration/buffer_distribution_entropy": 0.9612254715420541, "calibration/confidence_entropy": 0.47528040756113155, "calibration/coverage@0%": 0.034765625, "calibration/coverage@1%": 0.034765625, "calibration/coverage@10%": 0.15595110689823874, "calibration/coverage@15%": 0.24117080479452055, "calibration/coverage@20%": 0.33809243517612525, "calibration/coverage@25%": 0.4576481470156556, "calibration/coverage@30%": 0.5338490704500979, "calibration/coverage@5%": 0.04765625, "calibration/ece": 0.13823007154421446, "calibration/mean_confidence": 0.6021676149102527, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 696.2, "completions/max_terminated_length": 696.2, "completions/mean_length": 220.351953125, "completions/mean_terminated_length": 220.63258666992186, "completions/min_length": 0.0, "completions/min_terminated_length": 103.2, "epoch": 0.864, "grad_norm": 0.0010584808187559247, "learning_rate": 1e-06, "loss": -0.0011, "num_tokens": 918895258.0, "reward": 0.9751318335533142, "reward_std": 0.0712385781109333, "rewards/accuracy_reward": 0.5873046875, "rewards/brier_reward": 0.7925786375999451, "rewards/confidence_uniqueness_reward": 0.9517351508140564, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.0879151001572609, "rewards/frontier_coverage_1": 0.0879151001572609, "rewards/frontier_coverage_10": 0.08317596241831779, "rewards/frontier_coverage_15": 0.07355262599885463, "rewards/frontier_coverage_20": 0.060579386353492734, "rewards/frontier_coverage_25": 0.056345708668231964, "rewards/frontier_coverage_5": 0.08777875155210495, "signal/accuracy_reward/centered_abs_mean": 0.09532470703125, "signal/accuracy_reward/group_std_mean": 0.1278452306985855, "signal/accuracy_reward/group_zero_std_frac": 0.621875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.047662353515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.047662353515625, "signal/advantage_abs_mean": 0.052195066958665846, "signal/advantage_pre_scale_abs_mean": 0.052195066958665846, "signal/advantage_pre_scale_std": 0.10198588073253631, "signal/advantage_std": 0.10198588073253631, "signal/brier_reward/centered_abs_mean": 0.11365769803524017, "signal/brier_reward/group_std_mean": 0.14665003418922423, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011365770548582076, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011365770548582076, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022322241216897964, "signal/confidence_uniqueness_reward/group_std_mean": 0.031285477429628374, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022322241216897964, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022322241216897964, "signal/format_reward/centered_abs_mean": 0.002435302734375, "signal/format_reward/group_std_mean": 0.006508936360478401, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875, "signal/frontier_coverage_0/centered_abs_mean": 0.14946494698524476, "signal/frontier_coverage_0/group_std_mean": 0.1939655214548111, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002137348777614534, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002137348777614534, "signal/frontier_coverage_1/centered_abs_mean": 0.14946494698524476, "signal/frontier_coverage_1/group_std_mean": 0.1939655214548111, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002137348777614534, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002137348777614534, "signal/frontier_coverage_10/centered_abs_mean": 0.13306838274002075, "signal/frontier_coverage_10/group_std_mean": 0.17288758158683776, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019028778653591872, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019028778653591872, "signal/frontier_coverage_15/centered_abs_mean": 0.11056115180253982, "signal/frontier_coverage_15/group_std_mean": 0.14380019903182983, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015810244716703893, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015810244716703893, "signal/frontier_coverage_20/centered_abs_mean": 0.08252616226673126, "signal/frontier_coverage_20/group_std_mean": 0.10731232017278672, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011801241431385278, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011801241431385278, "signal/frontier_coverage_25/centered_abs_mean": 0.06337658017873764, "signal/frontier_coverage_25/group_std_mean": 0.08188621997833252, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.000906285154633224, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.000906285154633224, "signal/frontier_coverage_5/centered_abs_mean": 0.14790982007980347, "signal/frontier_coverage_5/group_std_mean": 0.19197991490364075, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002115110377781093, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002115110377781093, "step": 270 }, { "calibration/aurc": 0.3599516688133769, "calibration/batch_distribution_entropy": 0.9434260369168366, "calibration/buffer_distribution_entropy": 0.9574774690861215, "calibration/confidence_entropy": 0.437518390452558, "calibration/coverage@0%": 0.005484059634127623, "calibration/coverage@1%": 0.005484059634127623, "calibration/coverage@10%": 0.05318469468266759, "calibration/coverage@15%": 0.11540664210889835, "calibration/coverage@20%": 0.16473415436380032, "calibration/coverage@25%": 0.25131654159951655, "calibration/coverage@30%": 0.33701887518706114, "calibration/coverage@5%": 0.005484059634127623, "calibration/ece": 0.1427166164579369, "calibration/mean_confidence": 0.5087113572934878, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00126953125, "completions/max_length": 853.8, "completions/max_terminated_length": 853.8, "completions/mean_length": 218.22265625, "completions/mean_terminated_length": 218.5016082763672, "completions/min_length": 0.0, "completions/min_terminated_length": 104.0, "epoch": 0.88, "grad_norm": 0.0008610005606897175, "learning_rate": 1e-06, "loss": -0.0013, "num_tokens": 936276930.0, "reward": 0.9432915210723877, "reward_std": 0.07173903733491897, "rewards/accuracy_reward": 0.51357421875, "rewards/brier_reward": 0.7973132848739624, "rewards/confidence_uniqueness_reward": 0.9522138953208923, "rewards/format_reward": 0.99873046875, "rewards/frontier_coverage_0": 0.1482342377305031, "rewards/frontier_coverage_1": 0.1482342377305031, "rewards/frontier_coverage_10": 0.13063293248414992, "rewards/frontier_coverage_15": 0.11058640480041504, "rewards/frontier_coverage_20": 0.09042486101388932, "rewards/frontier_coverage_25": 0.07815308347344399, "rewards/frontier_coverage_5": 0.1459364965558052, "signal/accuracy_reward/centered_abs_mean": 0.095428466796875, "signal/accuracy_reward/group_std_mean": 0.12478266805410385, "signal/accuracy_reward/group_zero_std_frac": 0.640625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0477142333984375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477142333984375, "signal/advantage_abs_mean": 0.05423672944307327, "signal/advantage_pre_scale_abs_mean": 0.05423672944307327, "signal/advantage_pre_scale_std": 0.10489667057991028, "signal/advantage_std": 0.10489667057991028, "signal/brier_reward/centered_abs_mean": 0.11198111921548844, "signal/brier_reward/group_std_mean": 0.14444852769374847, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.011198111996054649, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011198111996054649, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022622523456811906, "signal/confidence_uniqueness_reward/group_std_mean": 0.03173264637589455, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022622523829340935, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022622523829340935, "signal/format_reward/centered_abs_mean": 0.002435302734375, "signal/format_reward/group_std_mean": 0.006508936267346143, "signal/format_reward/group_zero_std_frac": 0.965625, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0012176513671875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012176513671875, "signal/frontier_coverage_0/centered_abs_mean": 0.1513482302427292, "signal/frontier_coverage_0/group_std_mean": 0.1945643663406372, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0021642797160893678, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021642797160893678, "signal/frontier_coverage_1/centered_abs_mean": 0.1513482302427292, "signal/frontier_coverage_1/group_std_mean": 0.1945643663406372, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0021642797160893678, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021642797160893678, "signal/frontier_coverage_10/centered_abs_mean": 0.13286824375391007, "signal/frontier_coverage_10/group_std_mean": 0.1707235634326935, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019000159576535226, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019000159576535226, "signal/frontier_coverage_15/centered_abs_mean": 0.10618945211172104, "signal/frontier_coverage_15/group_std_mean": 0.13699231892824174, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015185092808678747, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015185092808678747, "signal/frontier_coverage_20/centered_abs_mean": 0.08019336313009262, "signal/frontier_coverage_20/group_std_mean": 0.1035075157880783, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011467650765553117, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011467650765553117, "signal/frontier_coverage_25/centered_abs_mean": 0.06250079050660133, "signal/frontier_coverage_25/group_std_mean": 0.08046629726886749, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008937613223679364, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008937613223679364, "signal/frontier_coverage_5/centered_abs_mean": 0.14934307038784028, "signal/frontier_coverage_5/group_std_mean": 0.191866672039032, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.002135605877265334, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002135605877265334, "step": 275 }, { "calibration/aurc": 0.36265407533836524, "calibration/batch_distribution_entropy": 0.9281281036058401, "calibration/buffer_distribution_entropy": 0.9562500495415044, "calibration/confidence_entropy": 0.4386896076321184, "calibration/coverage@0%": 0.015291047926019725, "calibration/coverage@1%": 0.015291047926019725, "calibration/coverage@10%": 0.05053767782798051, "calibration/coverage@15%": 0.12166972439660798, "calibration/coverage@20%": 0.1737108850389471, "calibration/coverage@25%": 0.23867640463719733, "calibration/coverage@30%": 0.3657895808152028, "calibration/coverage@5%": 0.04192401361229423, "calibration/ece": 0.1668563098098826, "calibration/mean_confidence": 0.5572778727670613, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 771.2, "completions/max_terminated_length": 771.2, "completions/mean_length": 219.6869140625, "completions/mean_terminated_length": 219.7939697265625, "completions/min_length": 44.0, "completions/min_terminated_length": 103.8, "epoch": 0.896, "grad_norm": 0.000931259011849761, "learning_rate": 1e-06, "loss": -0.0002, "num_tokens": 953637372.0, "reward": 0.9620537042617798, "reward_std": 0.062380281090736386, "rewards/accuracy_reward": 0.55, "rewards/brier_reward": 0.8034387230873108, "rewards/confidence_uniqueness_reward": 0.953917121887207, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.12949443906545638, "rewards/frontier_coverage_1": 0.12949443906545638, "rewards/frontier_coverage_10": 0.12721864879131317, "rewards/frontier_coverage_15": 0.11173846274614334, "rewards/frontier_coverage_20": 0.09186044484376907, "rewards/frontier_coverage_25": 0.09052028059959412, "rewards/frontier_coverage_5": 0.12822107076644898, "signal/accuracy_reward/centered_abs_mean": 0.083984375, "signal/accuracy_reward/group_std_mean": 0.11366891264915466, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0419921875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0419921875, "signal/advantage_abs_mean": 0.045716925710439685, "signal/advantage_pre_scale_abs_mean": 0.045716925710439685, "signal/advantage_pre_scale_std": 0.09269649535417557, "signal/advantage_std": 0.09269649535417557, "signal/brier_reward/centered_abs_mean": 0.10150657594203949, "signal/brier_reward/group_std_mean": 0.13047962486743928, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010150657780468465, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010150657780468465, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020040722191333772, "signal/confidence_uniqueness_reward/group_std_mean": 0.02639743983745575, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020040722563862802, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020040722563862802, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.13849151730537415, "signal/frontier_coverage_0/group_std_mean": 0.1763758659362793, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019804287469014524, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019804287469014524, "signal/frontier_coverage_1/centered_abs_mean": 0.13849151730537415, "signal/frontier_coverage_1/group_std_mean": 0.1763758659362793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019804287469014524, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019804287469014524, "signal/frontier_coverage_10/centered_abs_mean": 0.12867191731929778, "signal/frontier_coverage_10/group_std_mean": 0.16390889883041382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018400083761662244, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018400083761662244, "signal/frontier_coverage_15/centered_abs_mean": 0.10785606354475022, "signal/frontier_coverage_15/group_std_mean": 0.1374804839491844, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015423417557030917, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015423417557030917, "signal/frontier_coverage_20/centered_abs_mean": 0.08022382259368896, "signal/frontier_coverage_20/group_std_mean": 0.10267434567213059, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011472006561234593, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011472006561234593, "signal/frontier_coverage_25/centered_abs_mean": 0.06498619243502617, "signal/frontier_coverage_25/group_std_mean": 0.08332770913839341, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009293025592342019, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009293025592342019, "signal/frontier_coverage_5/centered_abs_mean": 0.1374574899673462, "signal/frontier_coverage_5/group_std_mean": 0.17511171400547026, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019656420452520253, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019656420452520253, "step": 280 }, { "calibration/aurc": 0.37522512880027037, "calibration/batch_distribution_entropy": 0.9442782389923409, "calibration/buffer_distribution_entropy": 0.947399954454208, "calibration/confidence_entropy": 0.4650686166519636, "calibration/coverage@0%": 0.016413129892367907, "calibration/coverage@1%": 0.016413129892367907, "calibration/coverage@10%": 0.07937443248049807, "calibration/coverage@15%": 0.10954009480048402, "calibration/coverage@20%": 0.2910374784209907, "calibration/coverage@25%": 0.36547853654511975, "calibration/coverage@30%": 0.4070216998710808, "calibration/coverage@5%": 0.04242551894562234, "calibration/ece": 0.1624069213865474, "calibration/mean_confidence": 0.5441825835729568, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00087890625, "completions/max_length": 791.0, "completions/max_terminated_length": 791.0, "completions/mean_length": 218.5203125, "completions/mean_terminated_length": 218.70968627929688, "completions/min_length": 46.0, "completions/min_terminated_length": 108.0, "epoch": 0.912, "grad_norm": 0.0007959533832035959, "learning_rate": 1e-06, "loss": -0.0005, "num_tokens": 970926316.0, "reward": 0.9589624524116516, "reward_std": 0.06368861570954323, "rewards/accuracy_reward": 0.54443359375, "rewards/brier_reward": 0.8047209978103638, "rewards/confidence_uniqueness_reward": 0.9570178985595703, "rewards/format_reward": 0.99912109375, "rewards/frontier_coverage_0": 0.1231890469789505, "rewards/frontier_coverage_1": 0.1231890469789505, "rewards/frontier_coverage_10": 0.11918876320123672, "rewards/frontier_coverage_15": 0.11012653410434722, "rewards/frontier_coverage_20": 0.08735538721084594, "rewards/frontier_coverage_25": 0.0844956398010254, "rewards/frontier_coverage_5": 0.12247090861201286, "signal/accuracy_reward/centered_abs_mean": 0.077764892578125, "signal/accuracy_reward/group_std_mean": 0.10732522755861282, "signal/accuracy_reward/group_zero_std_frac": 0.678125, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0388824462890625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0388824462890625, "signal/advantage_abs_mean": 0.046620288491249086, "signal/advantage_pre_scale_abs_mean": 0.046620288491249086, "signal/advantage_pre_scale_std": 0.09408236593008042, "signal/advantage_std": 0.09408236593008042, "signal/brier_reward/centered_abs_mean": 0.10646310597658157, "signal/brier_reward/group_std_mean": 0.13785125315189362, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010646310821175575, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010646310821175575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018950655311346053, "signal/confidence_uniqueness_reward/group_std_mean": 0.024912358820438386, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.001895065582357347, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001895065582357347, "signal/format_reward/centered_abs_mean": 0.001580810546875, "signal/format_reward/group_std_mean": 0.0033625274430960418, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007904052734375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007904052734375, "signal/frontier_coverage_0/centered_abs_mean": 0.1370942160487175, "signal/frontier_coverage_0/group_std_mean": 0.1766595095396042, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019604472909122705, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019604472909122705, "signal/frontier_coverage_1/centered_abs_mean": 0.1370942160487175, "signal/frontier_coverage_1/group_std_mean": 0.1766595095396042, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019604472909122705, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019604472909122705, "signal/frontier_coverage_10/centered_abs_mean": 0.13189242035150528, "signal/frontier_coverage_10/group_std_mean": 0.16979371905326843, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018860616255551577, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018860616255551577, "signal/frontier_coverage_15/centered_abs_mean": 0.11495250314474106, "signal/frontier_coverage_15/group_std_mean": 0.14815734326839447, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0016438208287581802, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016438208287581802, "signal/frontier_coverage_20/centered_abs_mean": 0.0817980095744133, "signal/frontier_coverage_20/group_std_mean": 0.10573563128709793, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001169711514376104, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001169711514376104, "signal/frontier_coverage_25/centered_abs_mean": 0.06745465323328972, "signal/frontier_coverage_25/group_std_mean": 0.0874060109257698, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009646015590988099, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009646015590988099, "signal/frontier_coverage_5/centered_abs_mean": 0.13618865460157395, "signal/frontier_coverage_5/group_std_mean": 0.17541859149932862, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019474976696074009, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019474976696074009, "step": 285 }, { "calibration/aurc": 0.4086159994676217, "calibration/batch_distribution_entropy": 0.9488851070088791, "calibration/buffer_distribution_entropy": 0.9612514103049214, "calibration/confidence_entropy": 0.46317632801857994, "calibration/coverage@0%": 0.0019546538649706457, "calibration/coverage@1%": 0.0019546538649706457, "calibration/coverage@10%": 0.0019546538649706457, "calibration/coverage@15%": 0.0019546538649706457, "calibration/coverage@20%": 0.030470278864970647, "calibration/coverage@25%": 0.1752140410958904, "calibration/coverage@30%": 0.23304718077299413, "calibration/coverage@5%": 0.0019546538649706457, "calibration/ece": 0.16270723826949696, "calibration/mean_confidence": 0.5144905505616594, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00068359375, "completions/max_length": 669.2, "completions/max_terminated_length": 669.2, "completions/mean_length": 212.41162109375, "completions/mean_terminated_length": 212.55691833496093, "completions/min_length": 22.6, "completions/min_terminated_length": 102.4, "epoch": 0.928, "grad_norm": 0.0006845975876785815, "learning_rate": 1e-06, "loss": -0.0006, "num_tokens": 988128227.0, "reward": 0.9499905347824097, "reward_std": 0.0637421689927578, "rewards/accuracy_reward": 0.53134765625, "rewards/brier_reward": 0.7886561274528503, "rewards/confidence_uniqueness_reward": 0.9511130928993226, "rewards/format_reward": 0.99921875, "rewards/frontier_coverage_0": 0.1233413815498352, "rewards/frontier_coverage_1": 0.1233413815498352, "rewards/frontier_coverage_10": 0.11664480417966842, "rewards/frontier_coverage_15": 0.10323808938264847, "rewards/frontier_coverage_20": 0.0839030459523201, "rewards/frontier_coverage_25": 0.07939638644456863, "rewards/frontier_coverage_5": 0.12051307708024979, "signal/accuracy_reward/centered_abs_mean": 0.076837158203125, "signal/accuracy_reward/group_std_mean": 0.1088681623339653, "signal/accuracy_reward/group_zero_std_frac": 0.6625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0384185791015625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0384185791015625, "signal/advantage_abs_mean": 0.0450009323656559, "signal/advantage_pre_scale_abs_mean": 0.0450009323656559, "signal/advantage_pre_scale_std": 0.0930885449051857, "signal/advantage_std": 0.0930885449051857, "signal/brier_reward/centered_abs_mean": 0.10286559611558914, "signal/brier_reward/group_std_mean": 0.13540334701538087, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010286559909582138, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010286559909582138, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022042611613869667, "signal/confidence_uniqueness_reward/group_std_mean": 0.02982432134449482, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0022042611613869666, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022042611613869666, "signal/format_reward/centered_abs_mean": 0.001513671875, "signal/format_reward/group_std_mean": 0.004419417260214687, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0007568359375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359375, "signal/frontier_coverage_0/centered_abs_mean": 0.13377356976270677, "signal/frontier_coverage_0/group_std_mean": 0.17625623643398286, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019129620399326087, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019129620399326087, "signal/frontier_coverage_1/centered_abs_mean": 0.13377356976270677, "signal/frontier_coverage_1/group_std_mean": 0.17625623643398286, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019129620399326087, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019129620399326087, "signal/frontier_coverage_10/centered_abs_mean": 0.12501855790615082, "signal/frontier_coverage_10/group_std_mean": 0.16456068456172943, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017877653473988176, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017877653473988176, "signal/frontier_coverage_15/centered_abs_mean": 0.10385439693927764, "signal/frontier_coverage_15/group_std_mean": 0.1367882251739502, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001485117874108255, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001485117874108255, "signal/frontier_coverage_20/centered_abs_mean": 0.07855610102415085, "signal/frontier_coverage_20/group_std_mean": 0.10324958562850953, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0011233522789552807, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011233522789552807, "signal/frontier_coverage_25/centered_abs_mean": 0.06524143964052201, "signal/frontier_coverage_25/group_std_mean": 0.08521927446126938, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009329525521025062, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009329525521025062, "signal/frontier_coverage_5/centered_abs_mean": 0.13079718947410585, "signal/frontier_coverage_5/group_std_mean": 0.1722914159297943, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018703997833654285, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018703997833654285, "step": 290 }, { "calibration/aurc": 0.23838562616966347, "calibration/batch_distribution_entropy": 0.9505648955577994, "calibration/buffer_distribution_entropy": 0.9718941061132969, "calibration/confidence_entropy": 0.46715676252481525, "calibration/coverage@0%": 0.04140625, "calibration/coverage@1%": 0.04140625, "calibration/coverage@10%": 0.241015625, "calibration/coverage@15%": 0.339453125, "calibration/coverage@20%": 0.458984375, "calibration/coverage@25%": 0.552734375, "calibration/coverage@30%": 0.63515625, "calibration/coverage@5%": 0.137109375, "calibration/ece": 0.09940677972037605, "calibration/mean_confidence": 0.531384959032291, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00048828125, "completions/max_length": 771.4, "completions/max_terminated_length": 771.4, "completions/mean_length": 212.0873046875, "completions/mean_terminated_length": 212.19146118164062, "completions/min_length": 42.6, "completions/min_terminated_length": 104.6, "epoch": 0.944, "grad_norm": 0.0008118631085380912, "learning_rate": 1e-06, "loss": -0.0001, "num_tokens": 1005275425.0, "reward": 0.9531220674514771, "reward_std": 0.06911338046193123, "rewards/accuracy_reward": 0.5326171875, "rewards/brier_reward": 0.7961865067481995, "rewards/confidence_uniqueness_reward": 0.9509644865989685, "rewards/format_reward": 0.99951171875, "rewards/frontier_coverage_0": 0.13934872150421143, "rewards/frontier_coverage_1": 0.13934872150421143, "rewards/frontier_coverage_10": 0.13322099447250366, "rewards/frontier_coverage_15": 0.12536731064319612, "rewards/frontier_coverage_20": 0.10790151357650757, "rewards/frontier_coverage_25": 0.07890170142054558, "rewards/frontier_coverage_5": 0.13902259171009063, "signal/accuracy_reward/centered_abs_mean": 0.09739990234375, "signal/accuracy_reward/group_std_mean": 0.1297285944223404, "signal/accuracy_reward/group_zero_std_frac": 0.61875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.048699951171875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.048699951171875, "signal/advantage_abs_mean": 0.051316916942596436, "signal/advantage_pre_scale_abs_mean": 0.051316916942596436, "signal/advantage_pre_scale_std": 0.1007079765200615, "signal/advantage_std": 0.1007079765200615, "signal/brier_reward/centered_abs_mean": 0.10415657460689545, "signal/brier_reward/group_std_mean": 0.1357142448425293, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010415657423436642, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010415657423436642, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02197747528553009, "signal/confidence_uniqueness_reward/group_std_mean": 0.028798850253224373, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.002197747630998492, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002197747630998492, "signal/format_reward/centered_abs_mean": 0.000946044921875, "signal/format_reward/group_std_mean": 0.0027621358167380095, "signal/format_reward/group_zero_std_frac": 0.984375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0004730224609375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004730224609375, "signal/frontier_coverage_0/centered_abs_mean": 0.14225318431854247, "signal/frontier_coverage_0/group_std_mean": 0.18444684743881226, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002034220518544316, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002034220518544316, "signal/frontier_coverage_1/centered_abs_mean": 0.14225318431854247, "signal/frontier_coverage_1/group_std_mean": 0.18444684743881226, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002034220518544316, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002034220518544316, "signal/frontier_coverage_10/centered_abs_mean": 0.13472193479537964, "signal/frontier_coverage_10/group_std_mean": 0.1746243953704834, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019265236100181938, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019265236100181938, "signal/frontier_coverage_15/centered_abs_mean": 0.12378777861595154, "signal/frontier_coverage_15/group_std_mean": 0.16058792769908906, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001770165259949863, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001770165259949863, "signal/frontier_coverage_20/centered_abs_mean": 0.10432201772928237, "signal/frontier_coverage_20/group_std_mean": 0.13533593565225602, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014918048167601229, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014918048167601229, "signal/frontier_coverage_25/centered_abs_mean": 0.06784244105219842, "signal/frontier_coverage_25/group_std_mean": 0.08792800456285477, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009701469331048429, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009701469331048429, "signal/frontier_coverage_5/centered_abs_mean": 0.14184999465942383, "signal/frontier_coverage_5/group_std_mean": 0.18391945362091064, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0020284549333155154, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020284549333155154, "step": 295 }, { "calibration/aurc": 0.341505781144742, "calibration/batch_distribution_entropy": 0.9222308232707069, "calibration/buffer_distribution_entropy": 0.9632253904748493, "calibration/confidence_entropy": 0.4160752706670393, "calibration/coverage@0%": 0.0046875, "calibration/coverage@1%": 0.0046875, "calibration/coverage@10%": 0.06015625, "calibration/coverage@15%": 0.2171875, "calibration/coverage@20%": 0.29375, "calibration/coverage@25%": 0.33515625, "calibration/coverage@30%": 0.380859375, "calibration/coverage@5%": 0.026953125, "calibration/ece": 0.16675665989188038, "calibration/mean_confidence": 0.5437287107428197, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009765625, "completions/max_length": 787.6, "completions/max_terminated_length": 787.6, "completions/mean_length": 212.77939453125, "completions/mean_terminated_length": 212.98646240234376, "completions/min_length": 21.4, "completions/min_terminated_length": 101.0, "epoch": 0.96, "grad_norm": 0.000677246309351176, "learning_rate": 1e-06, "loss": -0.0008, "num_tokens": 1022394606.0, "reward": 0.9520639538764953, "reward_std": 0.05611773431301117, "rewards/accuracy_reward": 0.52470703125, "rewards/brier_reward": 0.8068634748458863, "rewards/confidence_uniqueness_reward": 0.9482045531272888, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.163878333568573, "rewards/frontier_coverage_1": 0.163878333568573, "rewards/frontier_coverage_10": 0.15686869621276855, "rewards/frontier_coverage_15": 0.14912986606359482, "rewards/frontier_coverage_20": 0.1306234270334244, "rewards/frontier_coverage_25": 0.10091332048177719, "rewards/frontier_coverage_5": 0.162114617228508, "signal/accuracy_reward/centered_abs_mean": 0.071954345703125, "signal/accuracy_reward/group_std_mean": 0.09751666337251663, "signal/accuracy_reward/group_zero_std_frac": 0.715625, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0359771728515625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0359771728515625, "signal/advantage_abs_mean": 0.04099631011486053, "signal/advantage_pre_scale_abs_mean": 0.04099631011486053, "signal/advantage_pre_scale_std": 0.08593605160713196, "signal/advantage_std": 0.08593605160713196, "signal/brier_reward/centered_abs_mean": 0.09962098300457001, "signal/brier_reward/group_std_mean": 0.13012734651565552, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.00996209867298603, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.00996209867298603, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02344542071223259, "signal/confidence_uniqueness_reward/group_std_mean": 0.03061012886464596, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0023445420898497104, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023445420898497104, "signal/format_reward/centered_abs_mean": 0.00177001953125, "signal/format_reward/group_std_mean": 0.003914954606443644, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000885009765625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000885009765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1334471195936203, "signal/frontier_coverage_0/group_std_mean": 0.17524456679821016, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019082937389612198, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019082937389612198, "signal/frontier_coverage_1/centered_abs_mean": 0.1334471195936203, "signal/frontier_coverage_1/group_std_mean": 0.17524456679821016, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019082937389612198, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019082937389612198, "signal/frontier_coverage_10/centered_abs_mean": 0.1283472567796707, "signal/frontier_coverage_10/group_std_mean": 0.16842811405658722, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0018353657331317663, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018353657331317663, "signal/frontier_coverage_15/centered_abs_mean": 0.1224765032529831, "signal/frontier_coverage_15/group_std_mean": 0.16059996783733368, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001751414081081748, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001751414081081748, "signal/frontier_coverage_20/centered_abs_mean": 0.10159540325403213, "signal/frontier_coverage_20/group_std_mean": 0.13351670205593108, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0014528142288327216, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014528142288327216, "signal/frontier_coverage_25/centered_abs_mean": 0.07035883218050003, "signal/frontier_coverage_25/group_std_mean": 0.09163796603679657, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0010061312816105783, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010061312816105783, "signal/frontier_coverage_5/centered_abs_mean": 0.13208072930574416, "signal/frontier_coverage_5/group_std_mean": 0.17346138060092925, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018887544283643365, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018887544283643365, "step": 300 }, { "epoch": 0.96, "eval_calibration/aurc": 0.4217691397090048, "eval_calibration/batch_distribution_entropy": 0.8610313854852217, "eval_calibration/buffer_distribution_entropy": 0.9560461371941351, "eval_calibration/confidence_entropy": 0.434464526008027, "eval_calibration/coverage@0%": 0.0859375, "eval_calibration/coverage@1%": 0.0859375, "eval_calibration/coverage@10%": 0.0859375, "eval_calibration/coverage@15%": 0.0859375, "eval_calibration/coverage@20%": 0.171875, "eval_calibration/coverage@25%": 0.1953125, "eval_calibration/coverage@30%": 0.3828125, "eval_calibration/coverage@5%": 0.0859375, "eval_calibration/ece": 0.15024806293485138, "eval_calibration/mean_confidence": 0.4473162753004427, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 586.75, "eval_completions/max_terminated_length": 586.75, "eval_completions/mean_length": 216.80401229858398, "eval_completions/mean_terminated_length": 216.80401229858398, "eval_completions/min_length": 123.0, "eval_completions/min_terminated_length": 123.0, "eval_loss": 0.0, "eval_num_tokens": 1022394606.0, "eval_reward": 0.9061934798955917, "eval_reward_std": 0.2254936397075653, "eval_rewards/accuracy_reward": 0.423828125, "eval_rewards/brier_reward": 0.8254795223474503, "eval_rewards/confidence_uniqueness_reward": 0.892333984375, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.2570565640926361, "eval_rewards/frontier_coverage_1": 0.2570565640926361, "eval_rewards/frontier_coverage_10": 0.2536317780613899, "eval_rewards/frontier_coverage_15": 0.24184846132993698, "eval_rewards/frontier_coverage_20": 0.19645333662629128, "eval_rewards/frontier_coverage_25": 0.11186533235013485, "eval_rewards/frontier_coverage_5": 0.25538118183612823, "eval_runtime": 26.4029, "eval_samples_per_second": 18.937, "eval_signal/accuracy_reward/centered_abs_mean": 0.4708251953125, "eval_signal/accuracy_reward/group_std_mean": 0.4925679340958595, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.23541259765625, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23541259765625, "eval_signal/advantage_abs_mean": 0.21049191057682037, "eval_signal/advantage_pre_scale_abs_mean": 0.21049191057682037, "eval_signal/advantage_pre_scale_std": 0.22300851345062256, "eval_signal/advantage_std": 0.22300851345062256, "eval_signal/brier_reward/centered_abs_mean": 0.18114928901195526, "eval_signal/brier_reward/group_std_mean": 0.23810456693172455, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.01811492955312133, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01811492955312133, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0526275634765625, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06362566910684109, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.005262756545562297, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005262756545562297, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.39571524411439896, "eval_signal/frontier_coverage_0/group_std_mean": 0.4655715748667717, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.005658728186972439, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.005658728186972439, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.39571524411439896, "eval_signal/frontier_coverage_1/group_std_mean": 0.4655715748667717, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.005658728186972439, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.005658728186972439, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.38869649171829224, "eval_signal/frontier_coverage_10/group_std_mean": 0.45751991868019104, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0055583600187674165, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0055583600187674165, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.37286993116140366, "eval_signal/frontier_coverage_15/group_std_mean": 0.4395202621817589, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005332039901986718, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.005332039901986718, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2987586036324501, "eval_signal/frontier_coverage_20/group_std_mean": 0.35560180246829987, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004272247897461057, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004272247897461057, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.15963854268193245, "eval_signal/frontier_coverage_25/group_std_mean": 0.19564306735992432, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00228283106116578, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00228283106116578, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.39209768921136856, "eval_signal/frontier_coverage_5/group_std_mean": 0.46143699437379837, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00560699705965817, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00560699705965817, "eval_steps_per_second": 0.151, "step": 300 }, { "calibration/aurc": 0.26373361579535665, "calibration/batch_distribution_entropy": 0.9421094436183927, "calibration/buffer_distribution_entropy": 0.9528965422674689, "calibration/confidence_entropy": 0.4349062223781308, "calibration/coverage@0%": 0.038671875, "calibration/coverage@1%": 0.038671875, "calibration/coverage@10%": 0.225390625, "calibration/coverage@15%": 0.34921875, "calibration/coverage@20%": 0.4703125, "calibration/coverage@25%": 0.51953125, "calibration/coverage@30%": 0.6046875, "calibration/coverage@5%": 0.12890625, "calibration/ece": 0.17938745999152064, "calibration/mean_confidence": 0.47612366714428206, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00078125, "completions/max_length": 1079.0, "completions/max_terminated_length": 1079.0, "completions/mean_length": 218.6193359375, "completions/mean_terminated_length": 218.78925476074218, "completions/min_length": 20.2, "completions/min_terminated_length": 105.4, "epoch": 0.976, "grad_norm": 0.0011953199282288551, "learning_rate": 1e-06, "loss": -0.0004, "num_tokens": 1039494388.0, "reward": 0.9646060228347778, "reward_std": 0.06057727336883545, "rewards/accuracy_reward": 0.5572265625, "rewards/brier_reward": 0.7965489983558655, "rewards/confidence_uniqueness_reward": 0.9446879029273987, "rewards/format_reward": 0.99921875, "rewards/frontier_coverage_0": 0.13621663516387345, "rewards/frontier_coverage_1": 0.13621663516387345, "rewards/frontier_coverage_10": 0.13521524909883736, "rewards/frontier_coverage_15": 0.123319979198277, "rewards/frontier_coverage_20": 0.10419662147760392, "rewards/frontier_coverage_25": 0.0865128442645073, "rewards/frontier_coverage_5": 0.13564201332628728, "signal/accuracy_reward/centered_abs_mean": 0.08521728515625, "signal/accuracy_reward/group_std_mean": 0.11445611119270324, "signal/accuracy_reward/group_zero_std_frac": 0.66875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.042608642578125, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.042608642578125, "signal/advantage_abs_mean": 0.0442943774163723, "signal/advantage_pre_scale_abs_mean": 0.0442943774163723, "signal/advantage_pre_scale_std": 0.08979521989822388, "signal/advantage_std": 0.08979521989822388, "signal/brier_reward/centered_abs_mean": 0.10064524412155151, "signal/brier_reward/group_std_mean": 0.13214921504259108, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010064524598419666, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010064524598419666, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025105124711990355, "signal/confidence_uniqueness_reward/group_std_mean": 0.03264738321304321, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.00251051252707839, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00251051252707839, "signal/format_reward/centered_abs_mean": 0.00147705078125, "signal/format_reward/group_std_mean": 0.003687587613239884, "signal/format_reward/group_zero_std_frac": 0.98125, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000738525390625, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000738525390625, "signal/frontier_coverage_0/centered_abs_mean": 0.15215515047311784, "signal/frontier_coverage_0/group_std_mean": 0.19931451976299286, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00217581856995821, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00217581856995821, "signal/frontier_coverage_1/centered_abs_mean": 0.15215515047311784, "signal/frontier_coverage_1/group_std_mean": 0.19931451976299286, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00217581856995821, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00217581856995821, "signal/frontier_coverage_10/centered_abs_mean": 0.14945854544639586, "signal/frontier_coverage_10/group_std_mean": 0.19589938819408417, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0021372571820393203, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021372571820393203, "signal/frontier_coverage_15/centered_abs_mean": 0.13901536613702775, "signal/frontier_coverage_15/group_std_mean": 0.18246963918209075, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.001987919630482793, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001987919630482793, "signal/frontier_coverage_20/centered_abs_mean": 0.09175378829240799, "signal/frontier_coverage_20/group_std_mean": 0.12028330266475677, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001312079164199531, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001312079164199531, "signal/frontier_coverage_25/centered_abs_mean": 0.06841867417097092, "signal/frontier_coverage_25/group_std_mean": 0.0888764038681984, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0009783869958482684, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009783869958482684, "signal/frontier_coverage_5/centered_abs_mean": 0.15160029977560044, "signal/frontier_coverage_5/group_std_mean": 0.19860751628875734, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021678843069821594, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021678843069821594, "step": 305 }, { "calibration/aurc": 0.3631079336430432, "calibration/batch_distribution_entropy": 0.8903107972897141, "calibration/buffer_distribution_entropy": 0.9423624956881109, "calibration/confidence_entropy": 0.38868585708804126, "calibration/coverage@0%": 0.009375, "calibration/coverage@1%": 0.009375, "calibration/coverage@10%": 0.06881803449119374, "calibration/coverage@15%": 0.0981240826810176, "calibration/coverage@20%": 0.24775562622309194, "calibration/coverage@25%": 0.2883920927103718, "calibration/coverage@30%": 0.42278238136007823, "calibration/coverage@5%": 0.034804611056751464, "calibration/ece": 0.16911921169999553, "calibration/mean_confidence": 0.4151368878784917, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0001953125, "completions/max_length": 615.4, "completions/max_terminated_length": 615.4, "completions/mean_length": 218.641796875, "completions/mean_terminated_length": 218.68403015136718, "completions/min_length": 62.8, "completions/min_terminated_length": 105.8, "epoch": 0.992, "grad_norm": 0.0007313250098377466, "learning_rate": 1e-06, "loss": 0.0003, "num_tokens": 1056861760.0, "reward": 0.9498269557952881, "reward_std": 0.05522818714380264, "rewards/accuracy_reward": 0.52666015625, "rewards/brier_reward": 0.7941903471946716, "rewards/confidence_uniqueness_reward": 0.9336981177330017, "rewards/format_reward": 0.9998046875, "rewards/frontier_coverage_0": 0.16115660667419435, "rewards/frontier_coverage_1": 0.16115660667419435, "rewards/frontier_coverage_10": 0.1574849307537079, "rewards/frontier_coverage_15": 0.14156938940286637, "rewards/frontier_coverage_20": 0.09960801899433136, "rewards/frontier_coverage_25": 0.08336942940950394, "rewards/frontier_coverage_5": 0.16108837127685546, "signal/accuracy_reward/centered_abs_mean": 0.076116943359375, "signal/accuracy_reward/group_std_mean": 0.10370298027992249, "signal/accuracy_reward/group_zero_std_frac": 0.69375, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0380584716796875, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0380584716796875, "signal/advantage_abs_mean": 0.04109043329954147, "signal/advantage_pre_scale_abs_mean": 0.04109043329954147, "signal/advantage_pre_scale_std": 0.0837163046002388, "signal/advantage_std": 0.0837163046002388, "signal/brier_reward/centered_abs_mean": 0.10567554533481598, "signal/brier_reward/group_std_mean": 0.13649180233478547, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010567554831504821, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010567554831504821, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02967868894338608, "signal/confidence_uniqueness_reward/group_std_mean": 0.038240250945091245, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0029678690247237683, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029678690247237683, "signal/format_reward/centered_abs_mean": 0.00037841796875, "signal/format_reward/group_std_mean": 0.0011048543266952038, "signal/format_reward/group_zero_std_frac": 0.99375, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000189208984375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000189208984375, "signal/frontier_coverage_0/centered_abs_mean": 0.15005522966384888, "signal/frontier_coverage_0/group_std_mean": 0.19505343437194825, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.002145789796486497, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002145789796486497, "signal/frontier_coverage_1/centered_abs_mean": 0.15005522966384888, "signal/frontier_coverage_1/group_std_mean": 0.19505343437194825, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.002145789796486497, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002145789796486497, "signal/frontier_coverage_10/centered_abs_mean": 0.1445154994726181, "signal/frontier_coverage_10/group_std_mean": 0.18783792853355408, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0020665716379880904, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0020665716379880904, "signal/frontier_coverage_15/centered_abs_mean": 0.12960256338119508, "signal/frontier_coverage_15/group_std_mean": 0.16830175668001174, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0018533166265115141, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018533166265115141, "signal/frontier_coverage_20/centered_abs_mean": 0.09365049600601197, "signal/frontier_coverage_20/group_std_mean": 0.12195496857166291, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013392021879553794, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013392021879553794, "signal/frontier_coverage_25/centered_abs_mean": 0.059622716158628464, "signal/frontier_coverage_25/group_std_mean": 0.0775347501039505, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0008526048739440739, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008526048739440739, "signal/frontier_coverage_5/centered_abs_mean": 0.14962632954120636, "signal/frontier_coverage_5/group_std_mean": 0.19450531899929047, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0021396565716713667, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021396565716713667, "step": 310 }, { "calibration/aurc": 0.28396650781922805, "calibration/batch_distribution_entropy": 0.9298624640673321, "calibration/buffer_distribution_entropy": 0.9416951559070315, "calibration/confidence_entropy": 0.42543707865259917, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.10546875, "calibration/coverage@15%": 0.1123046875, "calibration/coverage@20%": 0.177734375, "calibration/coverage@25%": 0.52734375, "calibration/coverage@30%": 0.640625, "calibration/coverage@5%": 0.0, "calibration/ece": 0.17469015186957876, "calibration/mean_confidence": 0.5968461396947611, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0011260363520408379, "completions/max_length": 547.0, "completions/max_terminated_length": 547.0, "completions/mean_length": 219.1380844116211, "completions/mean_terminated_length": 219.38528442382812, "completions/min_length": 0.0, "completions/min_terminated_length": 107.0, "epoch": 0.9984, "num_tokens": 1063761617.0, "reward": 0.9532146751880646, "reward_std": 0.0626951027661562, "rewards/accuracy_reward": 0.5458984375, "rewards/brier_reward": 0.7678532898426056, "rewards/confidence_uniqueness_reward": 0.9525276124477386, "rewards/format_reward": 0.9990234375, "rewards/frontier_coverage_0": 0.10089538991451263, "rewards/frontier_coverage_1": 0.10089538991451263, "rewards/frontier_coverage_10": 0.0899675115942955, "rewards/frontier_coverage_15": 0.07815195806324482, "rewards/frontier_coverage_20": 0.07496267184615135, "rewards/frontier_coverage_25": 0.0691562332212925, "rewards/frontier_coverage_5": 0.09545820578932762, "signal/accuracy_reward/centered_abs_mean": 0.078643798828125, "signal/accuracy_reward/group_std_mean": 0.10699028894305229, "signal/accuracy_reward/group_zero_std_frac": 0.6796875, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.0393218994140625, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0393218994140625, "signal/advantage_abs_mean": 0.04600895382463932, "signal/advantage_pre_scale_abs_mean": 0.04600895382463932, "signal/advantage_pre_scale_std": 0.09656216576695442, "signal/advantage_std": 0.09656216576695442, "signal/brier_reward/centered_abs_mean": 0.1068677231669426, "signal/brier_reward/group_std_mean": 0.13664086163043976, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.010686772409826517, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010686772409826517, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020281177014112473, "signal/confidence_uniqueness_reward/group_std_mean": 0.0280290599912405, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0020281175966374576, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020281175966374576, "signal/format_reward/centered_abs_mean": 0.00189208984375, "signal/format_reward/group_std_mean": 0.005524271633476019, "signal/format_reward/group_zero_std_frac": 0.96875, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.000946044921875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000946044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.13520820438861847, "signal/frontier_coverage_0/group_std_mean": 0.17727045714855194, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019334774115122855, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019334774115122855, "signal/frontier_coverage_1/centered_abs_mean": 0.13520820438861847, "signal/frontier_coverage_1/group_std_mean": 0.17727045714855194, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019334774115122855, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019334774115122855, "signal/frontier_coverage_10/centered_abs_mean": 0.12330496311187744, "signal/frontier_coverage_10/group_std_mean": 0.16166674345731735, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0017632609815336764, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017632609815336764, "signal/frontier_coverage_15/centered_abs_mean": 0.10612555965781212, "signal/frontier_coverage_15/group_std_mean": 0.13906469196081161, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015175954904407263, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015175954904407263, "signal/frontier_coverage_20/centered_abs_mean": 0.08115751668810844, "signal/frontier_coverage_20/group_std_mean": 0.10668066889047623, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.001160552492365241, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001160552492365241, "signal/frontier_coverage_25/centered_abs_mean": 0.05299645476043224, "signal/frontier_coverage_25/group_std_mean": 0.06990226730704308, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0007578493095934391, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0007578493095934391, "signal/frontier_coverage_5/centered_abs_mean": 0.13076359033584595, "signal/frontier_coverage_5/group_std_mean": 0.1714838668704033, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0018699192441999912, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018699192441999912, "step": 312, "total_flos": 0.0, "train_loss": -0.0002521653109280846, "train_runtime": 61370.3515, "train_samples_per_second": 0.326, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 1063761617, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }