{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.499193760077999, "eval_steps": 50, "global_step": 1040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5229855849935021, "calibration/batch_distribution_entropy": 0.28178869591509825, "calibration/confidence_entropy": 0.22202290431332766, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.47728606089979236, "calibration/mean_confidence": 0.9145457949931112, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019357638888888907, "completions/max_length": 3991.8, "completions/max_terminated_length": 3991.8, "completions/mean_length": 516.1110229492188, "completions/mean_terminated_length": 526.2912231445313, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.010887011885643005, "learning_rate": 1.201923076923077e-07, "loss": 0.0077, "num_tokens": 9059807.0, "reward": 0.4539343655109406, "reward_std": 0.4154787003993988, "rewards/accuracy_reward": 0.2559895753860474, "rewards/brier_reward": 0.3074508547782898, "rewards/confidence_uniqueness_reward": 0.285185831785202, "rewards/format_reward": 0.5927951335906982, "rewards/frontier_coverage_0": 0.2700425565242767, "rewards/frontier_coverage_1": 0.2700425565242767, "rewards/frontier_coverage_10": 0.2700425565242767, "rewards/frontier_coverage_15": 0.2700425565242767, "rewards/frontier_coverage_20": 0.2700425565242767, "rewards/frontier_coverage_25": 0.2700425565242767, "rewards/frontier_coverage_5": 0.2700425565242767, "rewards/frontier_entropy_batch_reward": -0.5675290942192077, "signal/accuracy_reward/centered_abs_mean": 0.3038682699203491, "signal/accuracy_reward/group_std_mean": 0.3629651963710785, "signal/accuracy_reward/group_zero_std_frac": 0.1027777798473835, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3931264102458954, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15193413496017455, "signal/advantage_abs_mean": 0.8550266146659851, "signal/advantage_pre_scale_abs_mean": 0.35794793963432314, "signal/advantage_pre_scale_std": 0.42069290280342103, "signal/advantage_std": 0.9842098474502563, "signal/brier_reward/centered_abs_mean": 0.31524649262428284, "signal/brier_reward/group_std_mean": 0.368165111541748, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08156516402959824, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031524648517370225, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23518760204315187, "signal/confidence_uniqueness_reward/group_std_mean": 0.2871262729167938, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060857976973056796, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02351876050233841, "signal/format_reward/centered_abs_mean": 0.4417480409145355, "signal/format_reward/group_std_mean": 0.4756620943546295, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5716474652290344, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.22087402045726776, "signal/frontier_coverage_0/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_0/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_1/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_1/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_10/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_10/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_15/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_15/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_20/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_20/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_25/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_25/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_coverage_5/centered_abs_mean": 0.3050322890281677, "signal/frontier_coverage_5/group_std_mean": 0.36244935989379884, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011285928264260291, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004361961875110865, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.45104606747627257, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4828143179416656, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11673455983400345, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.045104607939720154, "step": 5 }, { "calibration/aurc": 0.4756377973542508, "calibration/batch_distribution_entropy": 0.25095197717333295, "calibration/confidence_entropy": 0.22121285056009152, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.43047562493454417, "calibration/mean_confidence": 0.9208003461593043, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018576388888888885, "completions/max_length": 3964.6, "completions/max_terminated_length": 3964.6, "completions/mean_length": 506.35322265625, "completions/mean_terminated_length": 515.9827209472656, "completions/min_length": 0.0, "completions/min_terminated_length": 3.8, "epoch": 0.023999700003749954, "grad_norm": 0.011354845017194748, "learning_rate": 2.403846153846154e-07, "loss": 0.0094, "num_tokens": 17975716.0, "reward": 0.4697346866130829, "reward_std": 0.4147303819656372, "rewards/accuracy_reward": 0.26328125, "rewards/brier_reward": 0.31500027775764466, "rewards/confidence_uniqueness_reward": 0.301408588886261, "rewards/format_reward": 0.6149305582046509, "rewards/frontier_coverage_0": 0.27632899284362794, "rewards/frontier_coverage_1": 0.27632899284362794, "rewards/frontier_coverage_10": 0.27632899284362794, "rewards/frontier_coverage_15": 0.27632899284362794, "rewards/frontier_coverage_20": 0.27632899284362794, "rewards/frontier_coverage_25": 0.27632899284362794, "rewards/frontier_coverage_5": 0.27632899284362794, "rewards/frontier_entropy_batch_reward": -0.5867263197898864, "signal/accuracy_reward/centered_abs_mean": 0.31220160722732543, "signal/accuracy_reward/group_std_mean": 0.3712098479270935, "signal/accuracy_reward/group_zero_std_frac": 0.09166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.40598496198654177, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15610080361366271, "signal/advantage_abs_mean": 0.8426825761795044, "signal/advantage_pre_scale_abs_mean": 0.35328022241592405, "signal/advantage_pre_scale_std": 0.41959584355354307, "signal/advantage_std": 0.9842082262039185, "signal/brier_reward/centered_abs_mean": 0.3161942720413208, "signal/brier_reward/group_std_mean": 0.3705894351005554, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0822685867547989, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031619428843259814, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23270548582077027, "signal/confidence_uniqueness_reward/group_std_mean": 0.28428863286972045, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06063591316342354, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02327054850757122, "signal/format_reward/centered_abs_mean": 0.4267469644546509, "signal/format_reward/group_std_mean": 0.46675443053245547, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5556710243225098, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21337348222732544, "signal/frontier_coverage_0/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_0/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_0/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_1/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_1/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_1/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_10/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_10/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_10/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_15/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_15/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_15/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_20/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_20/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_20/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_25/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_25/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_25/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_coverage_5/centered_abs_mean": 0.30924100875854493, "signal/frontier_coverage_5/group_std_mean": 0.3677679717540741, "signal/frontier_coverage_5/group_zero_std_frac": 0.002777777798473835, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01150359958410263, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004422146081924439, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.43938422203063965, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4763322174549103, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11442900747060776, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.043938422203063966, "step": 10 }, { "calibration/aurc": 0.5024067621409587, "calibration/batch_distribution_entropy": 0.24801621219288422, "calibration/confidence_entropy": 0.2106205381039305, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4819867701410224, "calibration/mean_confidence": 0.9234321037382752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019791666666666673, "completions/max_length": 4029.0, "completions/max_terminated_length": 4029.0, "completions/mean_length": 504.4060791015625, "completions/mean_terminated_length": 514.6802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 4.0, "epoch": 0.03599955000562493, "grad_norm": 0.007030100096017122, "learning_rate": 3.6057692307692306e-07, "loss": 0.0069, "num_tokens": 26888458.0, "reward": 0.46561177968978884, "reward_std": 0.3956846117973328, "rewards/accuracy_reward": 0.23828125, "rewards/brier_reward": 0.299567723274231, "rewards/confidence_uniqueness_reward": 0.3241762280464172, "rewards/format_reward": 0.6395833253860473, "rewards/frontier_coverage_0": 0.25464145839214325, "rewards/frontier_coverage_1": 0.25464145839214325, "rewards/frontier_coverage_10": 0.25464145839214325, "rewards/frontier_coverage_15": 0.25464145839214325, "rewards/frontier_coverage_20": 0.25464145839214325, "rewards/frontier_coverage_25": 0.25464145839214325, "rewards/frontier_coverage_5": 0.25464145839214325, "rewards/frontier_entropy_batch_reward": -0.6118452191352844, "signal/accuracy_reward/centered_abs_mean": 0.2900553375482559, "signal/accuracy_reward/group_std_mean": 0.35240110754966736, "signal/accuracy_reward/group_zero_std_frac": 0.10833333507180214, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.3923906862735748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14502766877412795, "signal/advantage_abs_mean": 0.8354665875434876, "signal/advantage_pre_scale_abs_mean": 0.33352160453796387, "signal/advantage_pre_scale_std": 0.4006196856498718, "signal/advantage_std": 0.9841970324516296, "signal/brier_reward/centered_abs_mean": 0.30055932998657225, "signal/brier_reward/group_std_mean": 0.3555553019046783, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08140757381916046, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030055934190750123, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.2306578904390335, "signal/confidence_uniqueness_reward/group_std_mean": 0.2857406497001648, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0626334622502327, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.02306578904390335, "signal/format_reward/centered_abs_mean": 0.4120876729488373, "signal/format_reward/group_std_mean": 0.45783867239952086, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5594466686248779, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.20604383647441865, "signal/frontier_coverage_0/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_0/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_1/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_1/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_10/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_10/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_15/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_15/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_20/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_20/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_25/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_25/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_coverage_5/centered_abs_mean": 0.29105416536331175, "signal/frontier_coverage_5/group_std_mean": 0.35061998963356017, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0112642303109169, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0041620745323598385, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.42714359760284426, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46969146728515626, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11596711426973343, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.042714360356330874, "step": 15 }, { "calibration/aurc": 0.5694586542786206, "calibration/batch_distribution_entropy": 0.2448034104520702, "calibration/confidence_entropy": 0.22888620321297623, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5197183535816638, "calibration/mean_confidence": 0.9203614575376587, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01918402777777779, "completions/max_length": 3979.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 462.74097900390626, "completions/mean_terminated_length": 471.86796875, "completions/min_length": 0.0, "completions/min_terminated_length": 2.2, "epoch": 0.04799940000749991, "grad_norm": 0.02198331244289875, "learning_rate": 4.807692307692308e-07, "loss": 0.0011, "num_tokens": 35332930.0, "reward": 0.5601749300956727, "reward_std": 0.3667020261287689, "rewards/accuracy_reward": 0.2842013895511627, "rewards/brier_reward": 0.3595377504825592, "rewards/confidence_uniqueness_reward": 0.3906712234020233, "rewards/format_reward": 0.7723958373069764, "rewards/frontier_coverage_0": 0.3041978418827057, "rewards/frontier_coverage_1": 0.3041978418827057, "rewards/frontier_coverage_10": 0.3041978418827057, "rewards/frontier_coverage_15": 0.3041978418827057, "rewards/frontier_coverage_20": 0.3041978418827057, "rewards/frontier_coverage_25": 0.3041978418827057, "rewards/frontier_coverage_5": 0.3041978418827057, "rewards/frontier_entropy_batch_reward": -0.7359476447105407, "signal/accuracy_reward/centered_abs_mean": 0.30268012285232543, "signal/accuracy_reward/group_std_mean": 0.3640410006046295, "signal/accuracy_reward/group_zero_std_frac": 0.09444444626569748, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4493618309497833, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15134006142616271, "signal/advantage_abs_mean": 0.7916141271591186, "signal/advantage_pre_scale_abs_mean": 0.29761979579925535, "signal/advantage_pre_scale_std": 0.3721330463886261, "signal/advantage_std": 0.984171736240387, "signal/brier_reward/centered_abs_mean": 0.3021188259124756, "signal/brier_reward/group_std_mean": 0.3569773733615875, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08976634591817856, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030211882293224336, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.21197229325771333, "signal/confidence_uniqueness_reward/group_std_mean": 0.2709016382694244, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0631466455757618, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.021197229623794556, "signal/format_reward/centered_abs_mean": 0.310796445608139, "signal/format_reward/group_std_mean": 0.39093394875526427, "signal/format_reward/group_zero_std_frac": 0.002777777798473835, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.4619352400302887, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1553982228040695, "signal/frontier_coverage_0/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_0/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_1/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_1/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_10/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_10/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_15/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_15/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_20/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_20/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_25/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_25/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_coverage_5/centered_abs_mean": 0.3003900110721588, "signal/frontier_coverage_5/group_std_mean": 0.3597902595996857, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.012757665291428567, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004295577295124531, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3481315076351166, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42123820185661315, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.002777777798473835, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.10343978106975556, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0348131500184536, "step": 20 }, { "calibration/aurc": 0.5489235090143648, "calibration/batch_distribution_entropy": 0.2749327391476236, "calibration/buffer_distribution_entropy": 0.2693125108855333, "calibration/confidence_entropy": 0.2360269107840506, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5054070723684616, "calibration/mean_confidence": 0.91632770676523, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0109375, "completions/max_length": 3966.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 412.9024353027344, "completions/mean_terminated_length": 417.5066162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 33.2, "epoch": 0.05999925000937488, "grad_norm": 0.010560334660112858, "learning_rate": 6.009615384615385e-07, "loss": -0.0219, "num_tokens": 43214014.0, "reward": 0.621948528289795, "reward_std": 0.2870722770690918, "rewards/accuracy_reward": 0.3277777791023254, "rewards/brier_reward": 0.4194044291973114, "rewards/confidence_uniqueness_reward": 0.47655481696128843, "rewards/format_reward": 0.907031238079071, "rewards/frontier_coverage_0": 0.015070206206291913, "rewards/frontier_coverage_1": 0.015070206206291913, "rewards/frontier_coverage_10": 0.015070206206291913, "rewards/frontier_coverage_15": 0.015070206206291913, "rewards/frontier_coverage_20": 0.015070206206291913, "rewards/frontier_coverage_25": 0.015070206206291913, "rewards/frontier_coverage_5": 0.015070206206291913, "rewards/frontier_entropy_batch_reward": -0.8656046628952027, "signal/accuracy_reward/centered_abs_mean": 0.32026910185813906, "signal/accuracy_reward/group_std_mean": 0.3814137876033783, "signal/accuracy_reward/group_zero_std_frac": 0.06666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.6105441927909852, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16013455092906953, "signal/advantage_abs_mean": 0.7641073822975158, "signal/advantage_pre_scale_abs_mean": 0.22727133631706237, "signal/advantage_pre_scale_std": 0.29072420597076415, "signal/advantage_std": 0.9840883612632751, "signal/brier_reward/centered_abs_mean": 0.3018735468387604, "signal/brier_reward/group_std_mean": 0.3560627937316895, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.11514985263347625, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.030187354236841202, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.184186252951622, "signal/confidence_uniqueness_reward/group_std_mean": 0.2384261429309845, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0701954871416092, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.018418625369668006, "signal/format_reward/centered_abs_mean": 0.152099609375, "signal/format_reward/group_std_mean": 0.24653084874153136, "signal/format_reward/group_zero_std_frac": 0.1444444440305233, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2882718056440353, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0760498046875, "signal/frontier_coverage_0/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_0/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_1/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_1/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_10/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_10/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_15/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_15/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_20/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_20/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_25/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_25/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_coverage_5/centered_abs_mean": 0.025607530772686005, "signal/frontier_coverage_5/group_std_mean": 0.04395473003387451, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0013895100564695895, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003661876980913803, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.21398890316486358, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3219181656837463, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.03611111231148243, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.08149619698524475, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.021398890390992166, "step": 25 }, { "calibration/aurc": 0.5382794629498553, "calibration/batch_distribution_entropy": 0.309108020546252, "calibration/buffer_distribution_entropy": 0.27122874768365013, "calibration/confidence_entropy": 0.25586708450642537, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4759124424924992, "calibration/mean_confidence": 0.9054469712207339, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3831.0, "completions/max_terminated_length": 3831.0, "completions/mean_length": 410.4355041503906, "completions/mean_terminated_length": 414.7814697265625, "completions/min_length": 0.0, "completions/min_terminated_length": 60.4, "epoch": 0.07199910001124986, "grad_norm": 0.01928151771426201, "learning_rate": 7.211538461538461e-07, "loss": -0.0436, "num_tokens": 51052151.0, "reward": 0.6726618885993958, "reward_std": 0.25515561997890474, "rewards/accuracy_reward": 0.36223958134651185, "rewards/brier_reward": 0.4675338685512543, "rewards/confidence_uniqueness_reward": 0.5357791066169739, "rewards/format_reward": 0.9634548544883728, "rewards/frontier_coverage_0": 0.009524069260805845, "rewards/frontier_coverage_1": 0.009524069260805845, "rewards/frontier_coverage_10": 0.009524069260805845, "rewards/frontier_coverage_15": 0.009524069260805845, "rewards/frontier_coverage_20": 0.009524069260805845, "rewards/frontier_coverage_25": 0.009524069260805845, "rewards/frontier_coverage_5": 0.009524069260805845, "rewards/frontier_entropy_batch_reward": -0.9146998524665833, "signal/accuracy_reward/centered_abs_mean": 0.32538520097732543, "signal/accuracy_reward/group_std_mean": 0.3815238237380981, "signal/accuracy_reward/group_zero_std_frac": 0.08611111417412758, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7917419075965881, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.16269260048866271, "signal/advantage_abs_mean": 0.7773361206054688, "signal/advantage_pre_scale_abs_mean": 0.20761180222034453, "signal/advantage_pre_scale_std": 0.26097519099712374, "signal/advantage_std": 0.9839826345443725, "signal/brier_reward/centered_abs_mean": 0.29560062289237976, "signal/brier_reward/group_std_mean": 0.34639087319374084, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14382269978523254, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.029560060799121858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.16958691775798798, "signal/confidence_uniqueness_reward/group_std_mean": 0.2160712420940399, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08290428072214126, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01695869229733944, "signal/format_reward/centered_abs_mean": 0.06507703959941864, "signal/format_reward/group_std_mean": 0.12960017919540406, "signal/format_reward/group_zero_std_frac": 0.45000000596046447, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.15022960901260377, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03253851979970932, "signal/frontier_coverage_0/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_0/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_1/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_1/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_10/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_10/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_15/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_15/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_20/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_20/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_25/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_25/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_coverage_5/centered_abs_mean": 0.021855851635336876, "signal/frontier_coverage_5/group_std_mean": 0.040756043046712875, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0015435649547725916, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0003125386836472899, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.146951425075531, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25599651634693144, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.1250000014901161, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07040144726634026, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.014695142395794392, "step": 30 }, { "calibration/aurc": 0.47850826375168937, "calibration/batch_distribution_entropy": 0.3793815586454916, "calibration/buffer_distribution_entropy": 0.2861277383673485, "calibration/confidence_entropy": 0.29658167577248173, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4002400196916637, "calibration/mean_confidence": 0.8884149740450364, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010416666666666675, "completions/max_length": 3734.8, "completions/max_terminated_length": 3734.8, "completions/mean_length": 450.95677490234374, "completions/mean_terminated_length": 455.6978393554688, "completions/min_length": 0.0, "completions/min_terminated_length": 89.6, "epoch": 0.08399895001312484, "grad_norm": 0.004714103415608406, "learning_rate": 8.41346153846154e-07, "loss": -0.0272, "num_tokens": 59324613.0, "reward": 0.7333313584327698, "reward_std": 0.23513826131820678, "rewards/accuracy_reward": 0.4403645873069763, "rewards/brier_reward": 0.5525379419326782, "rewards/confidence_uniqueness_reward": 0.590362799167633, "rewards/format_reward": 0.983506953716278, "rewards/frontier_coverage_0": 0.008734829723834991, "rewards/frontier_coverage_1": 0.008734829723834991, "rewards/frontier_coverage_10": 0.008734829723834991, "rewards/frontier_coverage_15": 0.008734829723834991, "rewards/frontier_coverage_20": 0.008734829723834991, "rewards/frontier_coverage_25": 0.008734829723834991, "rewards/frontier_coverage_5": 0.008734829723834991, "rewards/frontier_entropy_batch_reward": -0.9376886129379273, "signal/accuracy_reward/centered_abs_mean": 0.31514214277267455, "signal/accuracy_reward/group_std_mean": 0.3757008254528046, "signal/accuracy_reward/group_zero_std_frac": 0.07777777872979641, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8547364711761475, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15757107138633727, "signal/advantage_abs_mean": 0.777402913570404, "signal/advantage_pre_scale_abs_mean": 0.19204484224319457, "signal/advantage_pre_scale_std": 0.24264540672302246, "signal/advantage_std": 0.9839303016662597, "signal/brier_reward/centered_abs_mean": 0.2717652380466461, "signal/brier_reward/group_std_mean": 0.3256272315979004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14762357771396636, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.027176523208618165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17783839106559754, "signal/confidence_uniqueness_reward/group_std_mean": 0.2096118301153183, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0962824359536171, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.017783838883042337, "signal/format_reward/centered_abs_mean": 0.0297960065305233, "signal/format_reward/group_std_mean": 0.06317889839410781, "signal/format_reward/group_zero_std_frac": 0.7166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08153303265571595, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01489800326526165, "signal/frontier_coverage_0/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_0/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_1/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_1/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_10/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_10/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_15/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_15/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_20/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_20/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_25/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_25/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_coverage_5/centered_abs_mean": 0.02503722868859768, "signal/frontier_coverage_5/group_std_mean": 0.04525566101074219, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0019457651767879724, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00035803236532956363, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11057607978582382, "signal/frontier_entropy_batch_reward/group_std_mean": 0.21464248597621918, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.23055555820465087, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.060289456695318225, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011057608388364315, "step": 35 }, { "calibration/aurc": 0.4402947585970144, "calibration/batch_distribution_entropy": 0.4824331907575866, "calibration/buffer_distribution_entropy": 0.31577170793548615, "calibration/confidence_entropy": 0.34483839347598094, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.010026385224274407, "calibration/coverage@20%": 0.013192612137203167, "calibration/coverage@25%": 0.01424802110817942, "calibration/coverage@30%": 0.01424802110817942, "calibration/coverage@5%": 0.0, "calibration/ece": 0.33865251464556356, "calibration/mean_confidence": 0.8666601711603379, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009114583333333348, "completions/max_length": 3966.0, "completions/max_terminated_length": 3966.0, "completions/mean_length": 480.8202331542969, "completions/mean_terminated_length": 485.25437622070314, "completions/min_length": 0.0, "completions/min_terminated_length": 87.0, "epoch": 0.09599880001499982, "grad_norm": 0.0031753634102642536, "learning_rate": 9.615384615384617e-07, "loss": -0.0197, "num_tokens": 67983182.0, "reward": 0.7822724223136902, "reward_std": 0.21780899465084075, "rewards/accuracy_reward": 0.5059895753860474, "rewards/brier_reward": 0.6228862762451172, "rewards/confidence_uniqueness_reward": 0.6643950819969178, "rewards/format_reward": 0.9869791507720947, "rewards/frontier_coverage_0": 0.007964784186333418, "rewards/frontier_coverage_1": 0.007964784186333418, "rewards/frontier_coverage_10": 0.007964784186333418, "rewards/frontier_coverage_15": 0.007964784186333418, "rewards/frontier_coverage_20": 0.007964784186333418, "rewards/frontier_coverage_25": 0.007964784186333418, "rewards/frontier_coverage_5": 0.007964784186333418, "rewards/frontier_entropy_batch_reward": -0.9373735666275025, "signal/accuracy_reward/centered_abs_mean": 0.28925238847732543, "signal/accuracy_reward/group_std_mean": 0.3537147223949432, "signal/accuracy_reward/group_zero_std_frac": 0.10000000167638064, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0984381198883058, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14462619423866271, "signal/advantage_abs_mean": 0.7445023059844971, "signal/advantage_pre_scale_abs_mean": 0.1732119858264923, "signal/advantage_pre_scale_std": 0.22844835221767426, "signal/advantage_std": 0.9837140202522278, "signal/brier_reward/centered_abs_mean": 0.2372313529253006, "signal/brier_reward/group_std_mean": 0.2905817449092865, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18049295842647553, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02372313551604748, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.14429336190223693, "signal/confidence_uniqueness_reward/group_std_mean": 0.1727246791124344, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.11001295447349549, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.014429337345063686, "signal/format_reward/centered_abs_mean": 0.02393663190305233, "signal/format_reward/group_std_mean": 0.05173143371939659, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09135463684797288, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011968315951526165, "signal/frontier_coverage_0/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_0/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_1/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_1/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_10/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_10/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_15/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_15/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_20/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_20/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_25/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_25/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_coverage_5/centered_abs_mean": 0.032622770965099336, "signal/frontier_coverage_5/group_std_mean": 0.05645905360579491, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0035575965885072947, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00046650563017465175, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.11271507740020752, "signal/frontier_entropy_batch_reward/group_std_mean": 0.22643994092941283, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.20555555522441865, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0858034148812294, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.011271507106721401, "step": 40 }, { "calibration/aurc": 0.3377478184440539, "calibration/batch_distribution_entropy": 0.5848481948438579, "calibration/buffer_distribution_entropy": 0.35423291152038805, "calibration/confidence_entropy": 0.384831962509367, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.013829787234042554, "calibration/coverage@25%": 0.18736678360238054, "calibration/coverage@30%": 0.4657049563747111, "calibration/coverage@5%": 0.0, "calibration/ece": 0.22727172809322985, "calibration/mean_confidence": 0.8391492923935034, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009375, "completions/max_length": 4016.0, "completions/max_terminated_length": 4016.0, "completions/mean_length": 543.1631042480469, "completions/mean_terminated_length": 548.3945129394531, "completions/min_length": 0.0, "completions/min_terminated_length": 111.6, "epoch": 0.1079986500168748, "grad_norm": 0.01239377073943615, "learning_rate": 1.0817307692307693e-06, "loss": -0.0188, "num_tokens": 77375685.0, "reward": 0.813059675693512, "reward_std": 0.2060342937707901, "rewards/accuracy_reward": 0.5426215350627899, "rewards/brier_reward": 0.6699079275131226, "rewards/confidence_uniqueness_reward": 0.7288915514945984, "rewards/format_reward": 0.9876736164093017, "rewards/frontier_coverage_0": 0.0066648813604842875, "rewards/frontier_coverage_1": 0.0066648813604842875, "rewards/frontier_coverage_10": 0.0066648813604842875, "rewards/frontier_coverage_15": 0.0066648813604842875, "rewards/frontier_coverage_20": 0.0066648813604842875, "rewards/frontier_coverage_25": 0.0066648813604842875, "rewards/frontier_coverage_5": 0.0066648813604842875, "rewards/frontier_entropy_batch_reward": -0.9263499140739441, "signal/accuracy_reward/centered_abs_mean": 0.2699490010738373, "signal/accuracy_reward/group_std_mean": 0.3382055342197418, "signal/accuracy_reward/group_zero_std_frac": 0.11111111342906951, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.247203779220581, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.13497450053691865, "signal/advantage_abs_mean": 0.7202765941619873, "signal/advantage_pre_scale_abs_mean": 0.15997426211833954, "signal/advantage_pre_scale_std": 0.21895503699779512, "signal/advantage_std": 0.9835591673851013, "signal/brier_reward/centered_abs_mean": 0.20789836943149567, "signal/brier_reward/group_std_mean": 0.26231162548065184, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19199275672435762, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020789837837219237, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10277863144874573, "signal/confidence_uniqueness_reward/group_std_mean": 0.13384136855602263, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0936692550778389, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010277863219380378, "signal/format_reward/centered_abs_mean": 0.02222222238779068, "signal/format_reward/group_std_mean": 0.046404258161783216, "signal/format_reward/group_zero_std_frac": 0.794444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10093065053224563, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01111111119389534, "signal/frontier_coverage_0/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_0/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_1/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_1/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_10/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_10/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_15/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_15/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_20/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_20/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_25/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_25/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_coverage_5/centered_abs_mean": 0.04338513538241386, "signal/frontier_coverage_5/group_std_mean": 0.06866179034113884, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00571465864777565, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0006204074248671532, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.1316349670290947, "signal/frontier_entropy_batch_reward/group_std_mean": 0.25327491760253906, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.16388889029622078, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12047145068645478, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.013163497298955917, "step": 45 }, { "calibration/aurc": 0.44876515017592206, "calibration/batch_distribution_entropy": 0.6788178143136975, "calibration/buffer_distribution_entropy": 0.40940430199471384, "calibration/confidence_entropy": 0.44995376994049635, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.035263157894736843, "calibration/coverage@25%": 0.05526315789473685, "calibration/coverage@30%": 0.06649310441880102, "calibration/coverage@5%": 0.0, "calibration/ece": 0.286977385931817, "calibration/mean_confidence": 0.7954010856289189, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01493055555555558, "completions/max_length": 3769.0, "completions/max_terminated_length": 3769.0, "completions/mean_length": 586.178466796875, "completions/mean_terminated_length": 595.0872192382812, "completions/min_length": 0.0, "completions/min_terminated_length": 129.4, "epoch": 0.11999850001874976, "grad_norm": 0.0035637153778225183, "learning_rate": 1.201923076923077e-06, "loss": -0.029, "num_tokens": 87226061.0, "reward": 0.838405168056488, "reward_std": 0.19529550671577453, "rewards/accuracy_reward": 0.578906261920929, "rewards/brier_reward": 0.7015391111373901, "rewards/confidence_uniqueness_reward": 0.7591113924980164, "rewards/format_reward": 0.9816840291023254, "rewards/frontier_coverage_0": -0.0034364996245130897, "rewards/frontier_coverage_1": -0.0034364996245130897, "rewards/frontier_coverage_10": -0.0034364996245130897, "rewards/frontier_coverage_15": -0.0034364996245130897, "rewards/frontier_coverage_20": -0.0034364996245130897, "rewards/frontier_coverage_25": -0.0034364996245130897, "rewards/frontier_coverage_5": -0.0034364996245130897, "rewards/frontier_entropy_batch_reward": -0.8761105418205262, "signal/accuracy_reward/centered_abs_mean": 0.23926323652267456, "signal/accuracy_reward/group_std_mean": 0.3040441036224365, "signal/accuracy_reward/group_zero_std_frac": 0.17777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0301209807395935, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11963161826133728, "signal/advantage_abs_mean": 0.7018255352973938, "signal/advantage_pre_scale_abs_mean": 0.14790762662887574, "signal/advantage_pre_scale_std": 0.2124355673789978, "signal/advantage_std": 0.98361257314682, "signal/brier_reward/centered_abs_mean": 0.1796101748943329, "signal/brier_reward/group_std_mean": 0.22917198836803437, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15517139434814453, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01796101815998554, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.11911777704954148, "signal/confidence_uniqueness_reward/group_std_mean": 0.14986335337162018, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.1029736876487732, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.011911777965724468, "signal/format_reward/centered_abs_mean": 0.03136393222957849, "signal/format_reward/group_std_mean": 0.06070434525609016, "signal/format_reward/group_zero_std_frac": 0.7444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13335922509431838, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.015681966114789246, "signal/frontier_coverage_0/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_0/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_1/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_1/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_10/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_10/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_15/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_15/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_20/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_20/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_25/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_25/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_coverage_5/centered_abs_mean": 0.05662049055099487, "signal/frontier_coverage_5/group_std_mean": 0.0813647210597992, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0069804366677999495, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0008096729870885611, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.20503715574741363, "signal/frontier_entropy_batch_reward/group_std_mean": 0.33353949189186094, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.08333333432674409, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.17352269291877748, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.020503715239465238, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2739726821231387, "eval_calibration/batch_distribution_entropy": 0.6931429942513239, "eval_calibration/buffer_distribution_entropy": 0.44627538022254415, "eval_calibration/confidence_entropy": 0.49130108893650654, "eval_calibration/coverage@0%": 0.0871415770609319, "eval_calibration/coverage@1%": 0.0871415770609319, "eval_calibration/coverage@10%": 0.0871415770609319, "eval_calibration/coverage@15%": 0.22946908602150537, "eval_calibration/coverage@20%": 0.4065524193548387, "eval_calibration/coverage@25%": 0.5317204301075269, "eval_calibration/coverage@30%": 0.6578853046594982, "eval_calibration/coverage@5%": 0.0871415770609319, "eval_calibration/ece": 0.18241153673835128, "eval_calibration/mean_confidence": 0.7556062051971327, "eval_completions/clipped_ratio": 0.013020833333333351, "eval_completions/max_length": 3520.0, "eval_completions/max_terminated_length": 3520.0, "eval_completions/mean_length": 614.679189046224, "eval_completions/mean_terminated_length": 622.7157389322916, "eval_completions/min_length": 46.666666666666664, "eval_completions/min_terminated_length": 193.33333333333334, "eval_loss": 0.0, "eval_num_tokens": 87226061.0, "eval_reward": 0.8327561815579733, "eval_reward_std": 0.282521386941274, "eval_rewards/accuracy_reward": 0.5876736144224802, "eval_rewards/brier_reward": 0.7180667718251547, "eval_rewards/confidence_uniqueness_reward": 0.7620020310084025, "eval_rewards/format_reward": 0.9791666666666666, "eval_rewards/frontier_coverage_0": -0.0075345072740068035, "eval_rewards/frontier_coverage_1": -0.0075345072740068035, "eval_rewards/frontier_coverage_10": -0.0075345072740068035, "eval_rewards/frontier_coverage_15": -0.0075345072740068035, "eval_rewards/frontier_coverage_20": -0.0075345072740068035, "eval_rewards/frontier_coverage_25": -0.0075345072740068035, "eval_rewards/frontier_coverage_5": -0.0075345072740068035, "eval_rewards/frontier_entropy_batch_reward": -0.9791666666666666, "eval_runtime": 202.8998, "eval_samples_per_second": 4.929, "eval_signal/accuracy_reward/centered_abs_mean": 0.4644639740387599, "eval_signal/accuracy_reward/group_std_mean": 0.4887815515200297, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830911248922348, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.23223198701937994, "eval_signal/advantage_abs_mean": 0.9150658249855042, "eval_signal/advantage_pre_scale_abs_mean": 0.25886716693639755, "eval_signal/advantage_pre_scale_std": 0.28026849031448364, "eval_signal/advantage_std": 0.986470510562261, "eval_signal/brier_reward/centered_abs_mean": 0.24225710580746332, "eval_signal/brier_reward/group_std_mean": 0.2897177239259084, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08656309793392818, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.024225711201628048, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.1251125161846479, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1669869671265284, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.044979797676205635, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012511251804729303, "eval_signal/format_reward/centered_abs_mean": 0.039713542287548385, "eval_signal/format_reward/group_std_mean": 0.09991467806200187, "eval_signal/format_reward/group_zero_std_frac": 0.5000000099341074, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.07029524445533752, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.019856771143774193, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_0/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_1/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_10/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_15/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_20/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_25/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.08148297543327014, "eval_signal/frontier_coverage_5/group_std_mean": 0.1163974292576313, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.004172447098729511, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011652065246986847, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.039713542287548385, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.09991467806200187, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5000000099341074, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.014059049698213736, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.003971354415019353, "eval_steps_per_second": 0.03, "step": 50 }, { "epoch": 0.11999850001874976, "step": 50, "train_probe_calibration/aurc": 0.36517490575956213, "train_probe_calibration/batch_distribution_entropy": 0.6947527552817748, "train_probe_calibration/buffer_distribution_entropy": 0.4524707357411102, "train_probe_calibration/confidence_entropy": 0.4870250294794456, "train_probe_calibration/coverage@0%": 0.03245967741935484, "train_probe_calibration/coverage@1%": 0.03245967741935484, "train_probe_calibration/coverage@10%": 0.03245967741935484, "train_probe_calibration/coverage@15%": 0.03245967741935484, "train_probe_calibration/coverage@20%": 0.05396505376344086, "train_probe_calibration/coverage@25%": 0.28139560931899643, "train_probe_calibration/coverage@30%": 0.3445900537634408, "train_probe_calibration/coverage@5%": 0.03245967741935484, "train_probe_calibration/ece": 0.20738995295698923, "train_probe_calibration/mean_confidence": 0.7651828517025089, "train_probe_completions/clipped_ratio": 0.013715277777777776, "train_probe_completions/max_length": 2729.3333333333335, "train_probe_completions/max_terminated_length": 2729.3333333333335, "train_probe_completions/mean_length": 602.330576578776, "train_probe_completions/mean_terminated_length": 610.7609659830729, "train_probe_completions/min_length": 34.833333333333336, "train_probe_completions/min_terminated_length": 196.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 87226061.0, "train_probe_reward": 0.854498436053594, "train_probe_reward_std": 0.2775838126738866, "train_probe_rewards/accuracy_reward": 0.625, "train_probe_rewards/brier_reward": 0.7405163049697876, "train_probe_rewards/confidence_uniqueness_reward": 0.7529355386892954, "train_probe_rewards/format_reward": 0.984375, "train_probe_rewards/frontier_coverage_0": -0.010956409852951765, "train_probe_rewards/frontier_coverage_1": -0.010956409852951765, "train_probe_rewards/frontier_coverage_10": -0.010956409852951765, "train_probe_rewards/frontier_coverage_15": -0.010956409852951765, "train_probe_rewards/frontier_coverage_20": -0.010956409852951765, "train_probe_rewards/frontier_coverage_25": -0.010956409852951765, "train_probe_rewards/frontier_coverage_5": -0.010956409852951765, "train_probe_rewards/frontier_entropy_batch_reward": -0.984375, "train_probe_runtime": 200.0763, "train_probe_samples_per_second": 4.998, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4587673594554265, "train_probe_signal/accuracy_reward/group_std_mean": 0.48622022569179535, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8326980173587799, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.22938367972771326, "train_probe_signal/advantage_abs_mean": 0.9171395301818848, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2545117884874344, "train_probe_signal/advantage_pre_scale_std": 0.27495451271533966, "train_probe_signal/advantage_std": 0.9864658315976461, "train_probe_signal/brier_reward/centered_abs_mean": 0.23686287055412927, "train_probe_signal/brier_reward/group_std_mean": 0.28272825479507446, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08592941612005234, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.023686287303765614, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.12726762145757675, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1612810716032982, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04609783055881659, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012726762021581331, "train_probe_signal/format_reward/centered_abs_mean": 0.029947916821887095, "train_probe_signal/format_reward/group_std_mean": 0.07942011921356122, "train_probe_signal/format_reward/group_zero_std_frac": 0.5833333482344946, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.053789831697940826, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014973958410943547, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.0769535352786382, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.10913310199975967, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.00399844697676599, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0011004355813687046, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029947916821887095, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.07942011921356122, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.5833333482344946, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.01075796662674596, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0029947917792014778, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.38530740575080935, "calibration/batch_distribution_entropy": 0.7603393169444657, "calibration/buffer_distribution_entropy": 0.47986178564628323, "calibration/confidence_entropy": 0.5200688029931547, "calibration/coverage@0%": 0.004780705518839584, "calibration/coverage@1%": 0.004780705518839584, "calibration/coverage@10%": 0.008445626984808172, "calibration/coverage@15%": 0.008445626984808172, "calibration/coverage@20%": 0.019963951592138013, "calibration/coverage@25%": 0.14104588392992415, "calibration/coverage@30%": 0.4016393442622951, "calibration/coverage@5%": 0.004780705518839584, "calibration/ece": 0.17737637549373997, "calibration/mean_confidence": 0.7278365423980295, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012065972222222231, "completions/max_length": 4011.6, "completions/max_terminated_length": 4011.6, "completions/mean_length": 613.4942016601562, "completions/mean_terminated_length": 621.0005493164062, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 0.13199835002062474, "grad_norm": 0.010351327247917652, "learning_rate": 1.3221153846153848e-06, "loss": -0.0189, "num_tokens": 97374090.0, "reward": 0.8857142567634583, "reward_std": 0.18409750163555144, "rewards/accuracy_reward": 0.6035590291023254, "rewards/brier_reward": 0.7294302701950073, "rewards/confidence_uniqueness_reward": 0.8790017008781433, "rewards/format_reward": 0.9854166746139527, "rewards/frontier_coverage_0": -0.01264396500773728, "rewards/frontier_coverage_1": -0.01264396500773728, "rewards/frontier_coverage_10": -0.01264396500773728, "rewards/frontier_coverage_15": -0.01264396500773728, "rewards/frontier_coverage_20": -0.01264396500773728, "rewards/frontier_coverage_25": -0.01264396500773728, "rewards/frontier_coverage_5": -0.01264396500773728, "rewards/frontier_entropy_batch_reward": -0.6835112333297729, "signal/accuracy_reward/centered_abs_mean": 0.22823893129825593, "signal/accuracy_reward/group_std_mean": 0.2900137364864349, "signal/accuracy_reward/group_zero_std_frac": 0.2194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9254291892051697, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11411946564912796, "signal/advantage_abs_mean": 0.7338205933570862, "signal/advantage_pre_scale_abs_mean": 0.1410813570022583, "signal/advantage_pre_scale_std": 0.20234240591526031, "signal/advantage_std": 0.983665120601654, "signal/brier_reward/centered_abs_mean": 0.15796597599983214, "signal/brier_reward/group_std_mean": 0.20258763134479524, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.12823985517024994, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01579659804701805, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07788840532302857, "signal/confidence_uniqueness_reward/group_std_mean": 0.10405172556638717, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06344843953847885, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007788840495049953, "signal/format_reward/centered_abs_mean": 0.02560763880610466, "signal/format_reward/group_std_mean": 0.04888112768530846, "signal/format_reward/group_zero_std_frac": 0.800000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1036648079752922, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01280381940305233, "signal/frontier_coverage_0/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_0/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_1/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_1/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_10/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_10/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_15/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_15/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_20/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_20/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_25/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_25/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_coverage_5/centered_abs_mean": 0.08440108299255371, "signal/frontier_coverage_5/group_std_mean": 0.1161453977227211, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.009755328483879565, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0012069354532286524, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3892269194126129, "signal/frontier_entropy_batch_reward/group_std_mean": 0.46654823422431946, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.31475735306739805, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03892269358038902, "step": 55 }, { "calibration/aurc": 0.4003518482751858, "calibration/batch_distribution_entropy": 0.911201528778801, "calibration/buffer_distribution_entropy": 0.5497689485268447, "calibration/confidence_entropy": 0.5590389248971067, "calibration/coverage@0%": 0.002644209126607614, "calibration/coverage@1%": 0.002644209126607614, "calibration/coverage@10%": 0.011132272787085068, "calibration/coverage@15%": 0.15277683512130258, "calibration/coverage@20%": 0.20368842012441699, "calibration/coverage@25%": 0.20368842012441699, "calibration/coverage@30%": 0.2042161246099051, "calibration/coverage@5%": 0.002644209126607614, "calibration/ece": 0.19523114615371426, "calibration/mean_confidence": 0.589335139080694, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015104166666666674, "completions/max_length": 3861.6, "completions/max_terminated_length": 3861.6, "completions/mean_length": 621.9077392578125, "completions/mean_terminated_length": 631.4978149414062, "completions/min_length": 0.0, "completions/min_terminated_length": 162.4, "epoch": 0.14399820002249972, "grad_norm": 0.0028857546858489513, "learning_rate": 1.4423076923076922e-06, "loss": -0.0281, "num_tokens": 107635043.0, "reward": 0.909643542766571, "reward_std": 0.18056119680404664, "rewards/accuracy_reward": 0.5854166507720947, "rewards/brier_reward": 0.7150079846382141, "rewards/confidence_uniqueness_reward": 0.9329192876815796, "rewards/format_reward": 0.9831597089767456, "rewards/frontier_coverage_0": -0.022861182875931262, "rewards/frontier_coverage_1": -0.022861182875931262, "rewards/frontier_coverage_10": -0.022861182875931262, "rewards/frontier_coverage_15": -0.022861182875931262, "rewards/frontier_coverage_20": -0.022861182875931262, "rewards/frontier_coverage_25": -0.022861182875931262, "rewards/frontier_coverage_5": -0.022861182875931262, "rewards/frontier_entropy_batch_reward": -0.37148996591567995, "signal/accuracy_reward/centered_abs_mean": 0.22856987714767457, "signal/accuracy_reward/group_std_mean": 0.2946248114109039, "signal/accuracy_reward/group_zero_std_frac": 0.1972222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8809527635574341, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11428493857383729, "signal/advantage_abs_mean": 0.7478281497955322, "signal/advantage_pre_scale_abs_mean": 0.13646075129508972, "signal/advantage_pre_scale_std": 0.19674695134162903, "signal/advantage_std": 0.9837003707885742, "signal/brier_reward/centered_abs_mean": 0.18786855041980743, "signal/brier_reward/group_std_mean": 0.2366286039352417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14518831074237823, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.018786855787038804, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042062591016292575, "signal/confidence_uniqueness_reward/group_std_mean": 0.07180588617920876, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03262592852115631, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042062591761350635, "signal/format_reward/centered_abs_mean": 0.02958984412252903, "signal/format_reward/group_std_mean": 0.057399775832891464, "signal/format_reward/group_zero_std_frac": 0.7611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11437597423791886, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014794922061264515, "signal/frontier_coverage_0/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_0/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_1/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_1/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_10/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_10/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_15/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_15/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_20/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_20/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_25/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_25/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_coverage_5/centered_abs_mean": 0.1847160905599594, "signal/frontier_coverage_5/group_std_mean": 0.24841379821300508, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020291910134255887, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002641439996659756, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39274935722351073, "signal/frontier_entropy_batch_reward/group_std_mean": 0.45597169995307923, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.30623041093349457, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03927493616938591, "step": 60 }, { "calibration/aurc": 0.3440475068274386, "calibration/batch_distribution_entropy": 0.9846613687513786, "calibration/buffer_distribution_entropy": 0.631180763920832, "calibration/confidence_entropy": 0.5175700592510253, "calibration/coverage@0%": 0.005349028881419754, "calibration/coverage@1%": 0.005349028881419754, "calibration/coverage@10%": 0.007517050561636556, "calibration/coverage@15%": 0.007517050561636556, "calibration/coverage@20%": 0.037326611292945386, "calibration/coverage@25%": 0.35507176012497793, "calibration/coverage@30%": 0.5909584488716831, "calibration/coverage@5%": 0.005349028881419754, "calibration/ece": 0.22226896945146724, "calibration/mean_confidence": 0.539453303271356, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01206597222222221, "completions/max_length": 3910.0, "completions/max_terminated_length": 3910.0, "completions/mean_length": 604.5211791992188, "completions/mean_terminated_length": 611.927783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 157.8, "epoch": 0.1559980500243747, "grad_norm": 0.0038041872903704643, "learning_rate": 1.5625e-06, "loss": -0.0194, "num_tokens": 117693175.0, "reward": 0.9383891940116882, "reward_std": 0.16168527901172638, "rewards/accuracy_reward": 0.6201388955116272, "rewards/brier_reward": 0.689646327495575, "rewards/confidence_uniqueness_reward": 0.9409290075302124, "rewards/format_reward": 0.9869791746139527, "rewards/frontier_coverage_0": -0.06118553690612316, "rewards/frontier_coverage_1": -0.06118553690612316, "rewards/frontier_coverage_10": -0.06118553690612316, "rewards/frontier_coverage_15": -0.06118553690612316, "rewards/frontier_coverage_20": -0.06118553690612316, "rewards/frontier_coverage_25": -0.06118553690612316, "rewards/frontier_coverage_5": -0.06118553690612316, "rewards/frontier_entropy_batch_reward": -0.22102709710597992, "signal/accuracy_reward/centered_abs_mean": 0.20631510019302368, "signal/accuracy_reward/group_std_mean": 0.2710012078285217, "signal/accuracy_reward/group_zero_std_frac": 0.2416666716337204, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8475908994674682, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.10315755009651184, "signal/advantage_abs_mean": 0.751226258277893, "signal/advantage_pre_scale_abs_mean": 0.12262100130319595, "signal/advantage_pre_scale_std": 0.1762317180633545, "signal/advantage_std": 0.9836538314819336, "signal/brier_reward/centered_abs_mean": 0.22580573260784148, "signal/brier_reward/group_std_mean": 0.2729759395122528, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1860074907541275, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.022580574080348016, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03142448402941227, "signal/confidence_uniqueness_reward/group_std_mean": 0.05232158675789833, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02579816021025181, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031424484681338073, "signal/format_reward/centered_abs_mean": 0.021940104477107526, "signal/format_reward/group_std_mean": 0.04096246287226677, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08990018367767334, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010970052238553763, "signal/frontier_coverage_0/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_0/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_1/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_1/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_10/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_10/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_15/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_15/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_20/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_20/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_25/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_25/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_coverage_5/centered_abs_mean": 0.2512904554605484, "signal/frontier_coverage_5/group_std_mean": 0.32389657497406005, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029607247561216354, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003593453438952565, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30838763117790224, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38390254974365234, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.25373234748840334, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03083876222372055, "step": 65 }, { "calibration/aurc": 0.37614875200348924, "calibration/batch_distribution_entropy": 0.9437000522925292, "calibration/buffer_distribution_entropy": 0.6810074171481879, "calibration/confidence_entropy": 0.48104768667204156, "calibration/coverage@0%": 0.005291005291005291, "calibration/coverage@1%": 0.005291005291005291, "calibration/coverage@10%": 0.006349206349206349, "calibration/coverage@15%": 0.014285714285714285, "calibration/coverage@20%": 0.03965483808415745, "calibration/coverage@25%": 0.08833208676140612, "calibration/coverage@30%": 0.14934841248361422, "calibration/coverage@5%": 0.005291005291005291, "calibration/ece": 0.20092050987122345, "calibration/mean_confidence": 0.6378651698286639, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333326, "completions/max_length": 3443.0, "completions/max_terminated_length": 3443.0, "completions/mean_length": 591.0729248046875, "completions/mean_terminated_length": 597.8992553710938, "completions/min_length": 0.0, "completions/min_terminated_length": 137.6, "epoch": 0.16799790002624967, "grad_norm": 0.0031007726211100817, "learning_rate": 1.682692307692308e-06, "loss": -0.0277, "num_tokens": 127580479.0, "reward": 0.9297984480857849, "reward_std": 0.162366783618927, "rewards/accuracy_reward": 0.6066840410232544, "rewards/brier_reward": 0.7150557637214661, "rewards/confidence_uniqueness_reward": 0.9357271790504456, "rewards/format_reward": 0.9878472089767456, "rewards/frontier_coverage_0": -0.018339458177797495, "rewards/frontier_coverage_1": -0.018339458177797495, "rewards/frontier_coverage_10": -0.018339458177797495, "rewards/frontier_coverage_15": -0.018339458177797495, "rewards/frontier_coverage_20": -0.018339458177797495, "rewards/frontier_coverage_25": -0.018339458177797495, "rewards/frontier_coverage_5": -0.018339458177797495, "rewards/frontier_entropy_batch_reward": -0.30709723234176634, "signal/accuracy_reward/centered_abs_mean": 0.19468858242034912, "signal/accuracy_reward/group_std_mean": 0.2581492453813553, "signal/accuracy_reward/group_zero_std_frac": 0.2611111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9012615561485291, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09734429121017456, "signal/advantage_abs_mean": 0.737975811958313, "signal/advantage_pre_scale_abs_mean": 0.12125055640935897, "signal/advantage_pre_scale_std": 0.1813569724559784, "signal/advantage_std": 0.983547055721283, "signal/brier_reward/centered_abs_mean": 0.2159047991037369, "signal/brier_reward/group_std_mean": 0.26480709910392763, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2007334202528, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02159047983586788, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.035509735345840454, "signal/confidence_uniqueness_reward/group_std_mean": 0.056942480802536014, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03271013423800469, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003550973394885659, "signal/format_reward/centered_abs_mean": 0.021148003451526166, "signal/format_reward/group_std_mean": 0.03970330134034157, "signal/format_reward/group_zero_std_frac": 0.8388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09669478535652161, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010574001725763083, "signal/frontier_coverage_0/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_0/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_1/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_1/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_10/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_10/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_15/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_15/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_20/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_20/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_25/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_25/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_coverage_5/centered_abs_mean": 0.1944323420524597, "signal/frontier_coverage_5/group_std_mean": 0.26410470604896547, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.025967657193541528, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027803825214505196, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37358362078666685, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4412872850894928, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34820364117622377, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03735836371779442, "step": 70 }, { "calibration/aurc": 0.29475290186298847, "calibration/batch_distribution_entropy": 0.9601365965846205, "calibration/buffer_distribution_entropy": 0.7126132478286568, "calibration/confidence_entropy": 0.5229362687640748, "calibration/coverage@0%": 0.0036750668074058772, "calibration/coverage@1%": 0.0036750668074058772, "calibration/coverage@10%": 0.028272827664786983, "calibration/coverage@15%": 0.15863521881757633, "calibration/coverage@20%": 0.27442664596711597, "calibration/coverage@25%": 0.5169448769362663, "calibration/coverage@30%": 0.6216779623354258, "calibration/coverage@5%": 0.0036750668074058772, "calibration/ece": 0.20640462574306961, "calibration/mean_confidence": 0.5862351896256457, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010763888888888861, "completions/max_length": 3745.6, "completions/max_terminated_length": 3745.6, "completions/mean_length": 597.4527099609375, "completions/mean_terminated_length": 604.02197265625, "completions/min_length": 0.0, "completions/min_terminated_length": 166.4, "epoch": 0.17999775002812465, "grad_norm": 0.0030304889660328627, "learning_rate": 1.8028846153846156e-06, "loss": -0.0221, "num_tokens": 137528030.0, "reward": 0.9619996666908264, "reward_std": 0.15815145373344422, "rewards/accuracy_reward": 0.6619791626930237, "rewards/brier_reward": 0.7344695806503296, "rewards/confidence_uniqueness_reward": 0.9398493289947509, "rewards/format_reward": 0.9886284828186035, "rewards/frontier_coverage_0": -0.04306319504976273, "rewards/frontier_coverage_1": -0.04306319504976273, "rewards/frontier_coverage_10": -0.04306319504976273, "rewards/frontier_coverage_15": -0.04306319504976273, "rewards/frontier_coverage_20": -0.04306319504976273, "rewards/frontier_coverage_25": -0.04306319504976273, "rewards/frontier_coverage_5": -0.04306319504976273, "rewards/frontier_entropy_batch_reward": -0.26425428688526154, "signal/accuracy_reward/centered_abs_mean": 0.2045247346162796, "signal/accuracy_reward/group_std_mean": 0.2658460080623627, "signal/accuracy_reward/group_zero_std_frac": 0.2611111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9393451929092407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1022623673081398, "signal/advantage_abs_mean": 0.7476536273956299, "signal/advantage_pre_scale_abs_mean": 0.11897708922624588, "signal/advantage_pre_scale_std": 0.17594174146652222, "signal/advantage_std": 0.9835451006889343, "signal/brier_reward/centered_abs_mean": 0.20120880603790284, "signal/brier_reward/group_std_mean": 0.2491263210773468, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18733288943767548, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020120881125330926, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031463361158967015, "signal/confidence_uniqueness_reward/group_std_mean": 0.055449703335762025, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02855253517627716, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003146336181089282, "signal/format_reward/centered_abs_mean": 0.02031792579218745, "signal/format_reward/group_std_mean": 0.042368917539715764, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09047293290495872, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010158962896093726, "signal/frontier_coverage_0/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_0/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_1/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_1/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_10/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_10/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_15/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_15/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_20/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_20/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_25/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_25/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_coverage_5/centered_abs_mean": 0.21644563972949982, "signal/frontier_coverage_5/group_std_mean": 0.28655238151550294, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028951628133654594, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030951724853366613, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3392821609973907, "signal/frontier_entropy_batch_reward/group_std_mean": 0.412265545129776, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.31629372835159303, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033928216248750684, "step": 75 }, { "calibration/aurc": 0.25231172767774057, "calibration/batch_distribution_entropy": 0.9542390552983331, "calibration/buffer_distribution_entropy": 0.7438964308184917, "calibration/confidence_entropy": 0.5081422037709752, "calibration/coverage@0%": 0.014464726374207117, "calibration/coverage@1%": 0.014464726374207117, "calibration/coverage@10%": 0.11062715955993954, "calibration/coverage@15%": 0.3353820244649297, "calibration/coverage@20%": 0.47027582394426304, "calibration/coverage@25%": 0.6061727802317394, "calibration/coverage@30%": 0.6764227642276422, "calibration/coverage@5%": 0.03178756101987641, "calibration/ece": 0.17584206931549817, "calibration/mean_confidence": 0.6132220308319034, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015190972222222232, "completions/max_length": 3805.4, "completions/max_terminated_length": 3805.4, "completions/mean_length": 624.1913330078125, "completions/mean_terminated_length": 633.8685424804687, "completions/min_length": 0.0, "completions/min_terminated_length": 147.2, "epoch": 0.19199760002999963, "grad_norm": 0.017383286729454994, "learning_rate": 1.9230769230769234e-06, "loss": -0.0345, "num_tokens": 147771994.0, "reward": 0.9449098944664002, "reward_std": 0.1581345945596695, "rewards/accuracy_reward": 0.6279513835906982, "rewards/brier_reward": 0.7345611333847046, "rewards/confidence_uniqueness_reward": 0.9351885437965393, "rewards/format_reward": 0.9838541746139526, "rewards/frontier_coverage_0": -0.017121723480522634, "rewards/frontier_coverage_1": -0.017121723480522634, "rewards/frontier_coverage_10": -0.017121723480522634, "rewards/frontier_coverage_15": -0.017121723480522634, "rewards/frontier_coverage_20": -0.017121723480522634, "rewards/frontier_coverage_25": -0.017121723480522634, "rewards/frontier_coverage_5": -0.017121723480522634, "rewards/frontier_entropy_batch_reward": -0.2625397890806198, "signal/accuracy_reward/centered_abs_mean": 0.19267578125, "signal/accuracy_reward/group_std_mean": 0.2536807984113693, "signal/accuracy_reward/group_zero_std_frac": 0.286111119389534, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9132581114768982, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.096337890625, "signal/advantage_abs_mean": 0.7506688952445983, "signal/advantage_pre_scale_abs_mean": 0.11927641779184342, "signal/advantage_pre_scale_std": 0.17789039611816407, "signal/advantage_std": 0.9835266828536987, "signal/brier_reward/centered_abs_mean": 0.1951048344373703, "signal/brier_reward/group_std_mean": 0.2429314970970154, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18561370372772218, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019510484114289284, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03635510839521885, "signal/confidence_uniqueness_reward/group_std_mean": 0.05895108655095101, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03440159037709236, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003635510988533497, "signal/format_reward/centered_abs_mean": 0.02534722201526165, "signal/format_reward/group_std_mean": 0.04572202190756798, "signal/format_reward/group_zero_std_frac": 0.8166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1192347913980484, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012673611007630824, "signal/frontier_coverage_0/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_0/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_1/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_1/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_10/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_10/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_15/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_15/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_20/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_20/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_25/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_25/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_coverage_5/centered_abs_mean": 0.20994635820388793, "signal/frontier_coverage_5/group_std_mean": 0.2800168454647064, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02855151817202568, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030022328719496727, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3351067781448364, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40662684440612795, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3202349007129669, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03351067900657654, "step": 80 }, { "calibration/aurc": 0.23160324850331024, "calibration/batch_distribution_entropy": 0.9763916446165247, "calibration/buffer_distribution_entropy": 0.7679797116828759, "calibration/confidence_entropy": 0.488824354782312, "calibration/coverage@0%": 0.010098348032500199, "calibration/coverage@1%": 0.010098348032500199, "calibration/coverage@10%": 0.06652945039470493, "calibration/coverage@15%": 0.2055611553976477, "calibration/coverage@20%": 0.42201793128624443, "calibration/coverage@25%": 0.6277265455491603, "calibration/coverage@30%": 0.805941913022056, "calibration/coverage@5%": 0.010098348032500199, "calibration/ece": 0.1422296182290296, "calibration/mean_confidence": 0.5608671261986663, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011458333333333348, "completions/max_length": 3763.8, "completions/max_terminated_length": 3763.8, "completions/mean_length": 627.2774291992188, "completions/mean_terminated_length": 634.5992553710937, "completions/min_length": 0.0, "completions/min_terminated_length": 183.0, "epoch": 0.2039974500318746, "grad_norm": 0.0028041868936270475, "learning_rate": 2.043269230769231e-06, "loss": -0.0302, "num_tokens": 158085430.0, "reward": 0.9636515617370606, "reward_std": 0.15045951604843139, "rewards/accuracy_reward": 0.6599826216697693, "rewards/brier_reward": 0.7576205849647522, "rewards/confidence_uniqueness_reward": 0.9372734069824219, "rewards/format_reward": 0.9880208253860474, "rewards/frontier_coverage_0": -0.010684687085449696, "rewards/frontier_coverage_1": -0.010684687085449696, "rewards/frontier_coverage_10": -0.010684687085449696, "rewards/frontier_coverage_15": -0.010684687085449696, "rewards/frontier_coverage_20": -0.010684687085449696, "rewards/frontier_coverage_25": -0.010684687085449696, "rewards/frontier_coverage_5": -0.010684687085449696, "rewards/frontier_entropy_batch_reward": -0.28770090639591217, "signal/accuracy_reward/centered_abs_mean": 0.19462348222732545, "signal/accuracy_reward/group_std_mean": 0.25132531523704527, "signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0194868922233582, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09731174111366273, "signal/advantage_abs_mean": 0.7488582253456115, "signal/advantage_pre_scale_abs_mean": 0.11269197762012481, "signal/advantage_pre_scale_std": 0.17237231135368347, "signal/advantage_std": 0.9834303975105285, "signal/brier_reward/centered_abs_mean": 0.1861840546131134, "signal/brier_reward/group_std_mean": 0.23627618551254273, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19565358459949495, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01861840598285198, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033527684211730954, "signal/confidence_uniqueness_reward/group_std_mean": 0.05549739152193069, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03534887060523033, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033527683932334184, "signal/format_reward/centered_abs_mean": 0.0208984375, "signal/format_reward/group_std_mean": 0.04063469469547272, "signal/format_reward/group_zero_std_frac": 0.8305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11059261560440063, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01044921875, "signal/frontier_coverage_0/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_0/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_1/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_1/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_10/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_10/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_15/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_15/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_20/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_20/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_25/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_25/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_coverage_5/centered_abs_mean": 0.2140215367078781, "signal/frontier_coverage_5/group_std_mean": 0.28595501780509947, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03218608051538467, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003060508007183671, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33995108008384706, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41062380075454713, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3568801164627075, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033995109423995015, "step": 85 }, { "calibration/aurc": 0.18159155784667086, "calibration/batch_distribution_entropy": 0.9715807596045813, "calibration/buffer_distribution_entropy": 0.7911530467023005, "calibration/confidence_entropy": 0.5011403682671715, "calibration/coverage@0%": 0.03679449026337346, "calibration/coverage@1%": 0.03679449026337346, "calibration/coverage@10%": 0.26407442436525363, "calibration/coverage@15%": 0.40627110602921057, "calibration/coverage@20%": 0.5997198808977326, "calibration/coverage@25%": 0.7829376909111836, "calibration/coverage@30%": 0.8591208692225413, "calibration/coverage@5%": 0.05354158197526497, "calibration/ece": 0.15576247778024233, "calibration/mean_confidence": 0.5620462523617634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013975694444444464, "completions/max_length": 3526.8, "completions/max_terminated_length": 3526.8, "completions/mean_length": 621.3036499023438, "completions/mean_terminated_length": 630.1340087890625, "completions/min_length": 0.0, "completions/min_terminated_length": 166.2, "epoch": 0.2159973000337496, "grad_norm": 0.003203911706805229, "learning_rate": 2.1634615384615387e-06, "loss": -0.0348, "num_tokens": 168311520.0, "reward": 0.9634303450584412, "reward_std": 0.14751038253307341, "rewards/accuracy_reward": 0.6569444417953492, "rewards/brier_reward": 0.7645626664161682, "rewards/confidence_uniqueness_reward": 0.9359369039535522, "rewards/format_reward": 0.9853298544883728, "rewards/frontier_coverage_0": -0.0024734840262681246, "rewards/frontier_coverage_1": -0.0024734840262681246, "rewards/frontier_coverage_10": -0.0024734840262681246, "rewards/frontier_coverage_15": -0.0024734840262681246, "rewards/frontier_coverage_20": -0.0024734840262681246, "rewards/frontier_coverage_25": -0.0024734840262681246, "rewards/frontier_coverage_5": -0.0024734840262681246, "rewards/frontier_entropy_batch_reward": -0.2750917077064514, "signal/accuracy_reward/centered_abs_mean": 0.18245442807674409, "signal/accuracy_reward/group_std_mean": 0.2405119866132736, "signal/accuracy_reward/group_zero_std_frac": 0.3138888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9671781301498413, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09122721403837204, "signal/advantage_abs_mean": 0.7483711242675781, "signal/advantage_pre_scale_abs_mean": 0.1103176698088646, "signal/advantage_pre_scale_std": 0.17092794477939605, "signal/advantage_std": 0.9834154367446899, "signal/brier_reward/centered_abs_mean": 0.17637307345867156, "signal/brier_reward/group_std_mean": 0.22479958832263947, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18773746192455293, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017637307941913604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036441614478826524, "signal/confidence_uniqueness_reward/group_std_mean": 0.05890980660915375, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038740897178649904, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003644161531701684, "signal/format_reward/centered_abs_mean": 0.02457139752805233, "signal/format_reward/group_std_mean": 0.04477007761597633, "signal/format_reward/group_zero_std_frac": 0.8222222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1305002197623253, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012285698764026165, "signal/frontier_coverage_0/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_0/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_1/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_1/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_10/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_10/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_15/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_15/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_20/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_20/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_25/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_25/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_coverage_5/centered_abs_mean": 0.20906727015972137, "signal/frontier_coverage_5/group_std_mean": 0.27877257466316224, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03182048015296459, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002989662066102028, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.341404515504837, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4121077060699463, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3643798530101776, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034140453487634656, "step": 90 }, { "calibration/aurc": 0.2166042716481274, "calibration/batch_distribution_entropy": 0.9833011135389865, "calibration/buffer_distribution_entropy": 0.8075535376436263, "calibration/confidence_entropy": 0.49801110093217843, "calibration/coverage@0%": 0.016875791014919615, "calibration/coverage@1%": 0.016875791014919615, "calibration/coverage@10%": 0.3095501463732172, "calibration/coverage@15%": 0.5008451526286807, "calibration/coverage@20%": 0.605387048242923, "calibration/coverage@25%": 0.6472735880583714, "calibration/coverage@30%": 0.7001076678613718, "calibration/coverage@5%": 0.11083371426044182, "calibration/ece": 0.18083649378114502, "calibration/mean_confidence": 0.5293936765297097, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010590277777777768, "completions/max_length": 3390.8, "completions/max_terminated_length": 3390.8, "completions/mean_length": 650.1599975585938, "completions/mean_terminated_length": 657.1581176757812, "completions/min_length": 0.0, "completions/min_terminated_length": 168.6, "epoch": 0.22799715003562457, "grad_norm": 0.0029179048724472523, "learning_rate": 2.283653846153846e-06, "loss": -0.0354, "num_tokens": 178893043.0, "reward": 0.9682246446609497, "reward_std": 0.13790646493434905, "rewards/accuracy_reward": 0.6546007037162781, "rewards/brier_reward": 0.760300588607788, "rewards/confidence_uniqueness_reward": 0.9425395250320434, "rewards/format_reward": 0.9892361044883728, "rewards/frontier_coverage_0": -0.007322131656110287, "rewards/frontier_coverage_1": -0.007322131656110287, "rewards/frontier_coverage_10": -0.007322131656110287, "rewards/frontier_coverage_15": -0.007322131656110287, "rewards/frontier_coverage_20": -0.007322131656110287, "rewards/frontier_coverage_25": -0.007322131656110287, "rewards/frontier_coverage_5": -0.007322131656110287, "rewards/frontier_entropy_batch_reward": -0.2324485570192337, "signal/accuracy_reward/centered_abs_mean": 0.17507053017616273, "signal/accuracy_reward/group_std_mean": 0.23211880326271056, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9639934659004211, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08753526508808136, "signal/advantage_abs_mean": 0.7448830127716064, "signal/advantage_pre_scale_abs_mean": 0.10292523950338364, "signal/advantage_pre_scale_std": 0.15798973739147187, "signal/advantage_std": 0.9833694338798523, "signal/brier_reward/centered_abs_mean": 0.17270311415195466, "signal/brier_reward/group_std_mean": 0.21927067041397094, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1916445404291153, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017270312085747717, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02888911999762058, "signal/confidence_uniqueness_reward/group_std_mean": 0.04897563457489014, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032245057821273806, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028889121022075415, "signal/format_reward/centered_abs_mean": 0.018511285074055195, "signal/format_reward/group_std_mean": 0.03622478432953358, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10353666096925736, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009255642537027597, "signal/frontier_coverage_0/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_0/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_1/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_1/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_10/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_10/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_15/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_15/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_20/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_20/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_25/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_25/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_coverage_5/centered_abs_mean": 0.2209547370672226, "signal/frontier_coverage_5/group_std_mean": 0.2883128225803375, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03494723662734032, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003159652603790164, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3112953960895538, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38544243574142456, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3464753270149231, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031129539757966996, "step": 95 }, { "calibration/aurc": 0.16286997075318857, "calibration/batch_distribution_entropy": 0.9568406111615333, "calibration/buffer_distribution_entropy": 0.8212428711447959, "calibration/confidence_entropy": 0.5075458735010276, "calibration/coverage@0%": 0.04542281875223174, "calibration/coverage@1%": 0.04542281875223174, "calibration/coverage@10%": 0.22939056407012304, "calibration/coverage@15%": 0.5071482595725405, "calibration/coverage@20%": 0.7381393879589083, "calibration/coverage@25%": 0.8506309096295332, "calibration/coverage@30%": 0.9161175945633071, "calibration/coverage@5%": 0.08715547092641457, "calibration/ece": 0.13007832844366438, "calibration/mean_confidence": 0.6023370758512752, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014843750000000022, "completions/max_length": 3476.6, "completions/max_terminated_length": 3476.6, "completions/mean_length": 687.32822265625, "completions/mean_terminated_length": 697.676513671875, "completions/min_length": 0.0, "completions/min_terminated_length": 190.6, "epoch": 0.23999700003749952, "grad_norm": 0.0030280128121376038, "learning_rate": 2.403846153846154e-06, "loss": -0.0394, "num_tokens": 189910136.0, "reward": 0.972452974319458, "reward_std": 0.14463671147823334, "rewards/accuracy_reward": 0.6689236044883728, "rewards/brier_reward": 0.7945778131484985, "rewards/confidence_uniqueness_reward": 0.934015154838562, "rewards/format_reward": 0.9850694417953492, "rewards/frontier_coverage_0": 0.020587368682026864, "rewards/frontier_coverage_1": 0.020587368682026864, "rewards/frontier_coverage_10": 0.020587368682026864, "rewards/frontier_coverage_15": 0.020587368682026864, "rewards/frontier_coverage_20": 0.020587368682026864, "rewards/frontier_coverage_25": 0.020587368682026864, "rewards/frontier_coverage_5": 0.020587368682026864, "rewards/frontier_entropy_batch_reward": -0.29463626742362975, "signal/accuracy_reward/centered_abs_mean": 0.17580294609069824, "signal/accuracy_reward/group_std_mean": 0.22891995906829835, "signal/accuracy_reward/group_zero_std_frac": 0.3611111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0090148210525514, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08790147304534912, "signal/advantage_abs_mean": 0.7534759998321533, "signal/advantage_pre_scale_abs_mean": 0.10780888944864273, "signal/advantage_pre_scale_std": 0.16814400553703307, "signal/advantage_std": 0.9833277583122253, "signal/brier_reward/centered_abs_mean": 0.14416988790035248, "signal/brier_reward/group_std_mean": 0.18674043118953704, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1662220239639282, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014416989497840404, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03587759211659432, "signal/confidence_uniqueness_reward/group_std_mean": 0.05903834477066994, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041634421050548556, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003587759332731366, "signal/format_reward/centered_abs_mean": 0.02444661483168602, "signal/format_reward/group_std_mean": 0.04573269262909889, "signal/format_reward/group_zero_std_frac": 0.8111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1418396607041359, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01222330741584301, "signal/frontier_coverage_0/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_0/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_1/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_1/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_10/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_10/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_15/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_15/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_20/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_20/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_25/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_25/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_coverage_5/centered_abs_mean": 0.17305268943309784, "signal/frontier_coverage_5/group_std_mean": 0.22779336273670198, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028698178008198738, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024746534414589404, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3312057614326477, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40025997161865234, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3819632053375244, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03312057442963123, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.16686171218946902, "eval_calibration/batch_distribution_entropy": 0.9064578842709808, "eval_calibration/buffer_distribution_entropy": 0.8274689120482418, "eval_calibration/confidence_entropy": 0.5010017723628131, "eval_calibration/coverage@0%": 0.203125, "eval_calibration/coverage@1%": 0.203125, "eval_calibration/coverage@10%": 0.3645833333333333, "eval_calibration/coverage@15%": 0.59375, "eval_calibration/coverage@20%": 0.7239583333333334, "eval_calibration/coverage@25%": 0.890625, "eval_calibration/coverage@30%": 0.9427083333333334, "eval_calibration/coverage@5%": 0.21354166666666666, "eval_calibration/ece": 0.19091592880746577, "eval_calibration/mean_confidence": 0.6266145921062726, "eval_completions/clipped_ratio": 0.012152777777777771, "eval_completions/max_length": 2540.8333333333335, "eval_completions/max_terminated_length": 2540.8333333333335, "eval_completions/mean_length": 683.2637329101562, "eval_completions/mean_terminated_length": 691.6889953613281, "eval_completions/min_length": 90.83333333333333, "eval_completions/min_terminated_length": 244.5, "eval_loss": 0.0, "eval_num_tokens": 189910136.0, "eval_reward": 0.8880608379840851, "eval_reward_std": 0.2467256337404251, "eval_rewards/accuracy_reward": 0.6467013955116272, "eval_rewards/brier_reward": 0.7946632703145345, "eval_rewards/confidence_uniqueness_reward": 0.8764048020044962, "eval_rewards/format_reward": 0.9869791666666666, "eval_rewards/frontier_coverage_0": 0.028088706807466224, "eval_rewards/frontier_coverage_1": 0.028088706807466224, "eval_rewards/frontier_coverage_10": 0.028088706807466224, "eval_rewards/frontier_coverage_15": 0.028088706807466224, "eval_rewards/frontier_coverage_20": 0.028088706807466224, "eval_rewards/frontier_coverage_25": 0.028088706807466224, "eval_rewards/frontier_coverage_5": 0.028088706807466224, "eval_rewards/frontier_entropy_batch_reward": -0.9869791666666666, "eval_runtime": 203.6327, "eval_samples_per_second": 4.911, "eval_signal/accuracy_reward/centered_abs_mean": 0.4421115467945735, "eval_signal/accuracy_reward/group_std_mean": 0.4767638146877289, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9132186075051626, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.22105577339728674, "eval_signal/advantage_abs_mean": 0.8897554278373718, "eval_signal/advantage_pre_scale_abs_mean": 0.21958689639965692, "eval_signal/advantage_pre_scale_std": 0.24578682581583658, "eval_signal/advantage_std": 0.98641636967659, "eval_signal/brier_reward/centered_abs_mean": 0.19057869911193848, "eval_signal/brier_reward/group_std_mean": 0.2465061495701472, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0786722960571448, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019057870687295992, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.058801048124829926, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09244673078258832, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024099334763983887, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005880104377865791, "eval_signal/format_reward/centered_abs_mean": 0.024576822761446238, "eval_signal/format_reward/group_std_mean": 0.05818357535948356, "eval_signal/format_reward/group_zero_std_frac": 0.7222222437461218, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.04943395716448625, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012288411380723119, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_0/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_1/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_10/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_15/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_20/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_25/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.20176778982083002, "eval_signal/frontier_coverage_5/group_std_mean": 0.2950784166653951, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.011925621423870325, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002885279362089932, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.024576822761446238, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.05818357535948356, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7222222437461218, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00988679169677198, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002457682373157392, "eval_steps_per_second": 0.029, "step": 100 }, { "epoch": 0.23999700003749952, "step": 100, "train_probe_calibration/aurc": 0.1750085707513798, "train_probe_calibration/batch_distribution_entropy": 0.8806643739281838, "train_probe_calibration/buffer_distribution_entropy": 0.8284393122825452, "train_probe_calibration/confidence_entropy": 0.47174162139538317, "train_probe_calibration/coverage@0%": 0.198252688172043, "train_probe_calibration/coverage@1%": 0.198252688172043, "train_probe_calibration/coverage@10%": 0.3370295698924732, "train_probe_calibration/coverage@15%": 0.5196572580645161, "train_probe_calibration/coverage@20%": 0.7498319892473119, "train_probe_calibration/coverage@25%": 0.8697916666666666, "train_probe_calibration/coverage@30%": 0.9635416666666666, "train_probe_calibration/coverage@5%": 0.198252688172043, "train_probe_calibration/ece": 0.21608799111205979, "train_probe_calibration/mean_confidence": 0.6526666320313913, "train_probe_completions/clipped_ratio": 0.006944444444444457, "train_probe_completions/max_length": 2153.0, "train_probe_completions/max_terminated_length": 2153.0, "train_probe_completions/mean_length": 688.56494140625, "train_probe_completions/mean_terminated_length": 693.3657430013021, "train_probe_completions/min_length": 90.16666666666667, "train_probe_completions/min_terminated_length": 219.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 189910136.0, "train_probe_reward": 0.9111224909623464, "train_probe_reward_std": 0.23705051590998968, "train_probe_rewards/accuracy_reward": 0.6892361044883728, "train_probe_rewards/brier_reward": 0.8104513386885325, "train_probe_rewards/confidence_uniqueness_reward": 0.8828965425491333, "train_probe_rewards/format_reward": 0.9895833432674408, "train_probe_rewards/frontier_coverage_0": 0.013349682888171325, "train_probe_rewards/frontier_coverage_1": 0.013349682888171325, "train_probe_rewards/frontier_coverage_10": 0.013349682888171325, "train_probe_rewards/frontier_coverage_15": 0.013349682888171325, "train_probe_rewards/frontier_coverage_20": 0.013349682888171325, "train_probe_rewards/frontier_coverage_25": 0.013349682888171325, "train_probe_rewards/frontier_coverage_5": 0.013349682888171325, "train_probe_rewards/frontier_entropy_batch_reward": -0.9895833432674408, "train_probe_runtime": 187.4827, "train_probe_samples_per_second": 5.334, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4167751719554265, "train_probe_signal/accuracy_reward/group_std_mean": 0.4626837372779846, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8901467820008596, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20838758597771326, "train_probe_signal/advantage_abs_mean": 0.8610391517480215, "train_probe_signal/advantage_pre_scale_abs_mean": 0.2044555495182673, "train_probe_signal/advantage_pre_scale_std": 0.23520567764838538, "train_probe_signal/advantage_std": 0.9864020446936289, "train_probe_signal/brier_reward/centered_abs_mean": 0.1798029119769732, "train_probe_signal/brier_reward/group_std_mean": 0.23360620439052582, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07665263985594113, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01798029150813818, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.053685990472634636, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0893898606300354, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02291724147895972, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005368599280094107, "train_probe_signal/format_reward/centered_abs_mean": 0.020182291356225807, "train_probe_signal/format_reward/group_std_mean": 0.05892556471129259, "train_probe_signal/format_reward/group_zero_std_frac": 0.6666666815678278, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.041724019683897495, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010091145678112904, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.1969028984506925, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.2944646179676056, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0120306263367335, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002815711389606198, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.020182291356225807, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.05892556471129259, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666815678278, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.008344804247220358, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002018229104578495, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.30654955711846243, "calibration/batch_distribution_entropy": 0.9495631064195795, "calibration/buffer_distribution_entropy": 0.8329547447837949, "calibration/confidence_entropy": 0.5261223218554835, "calibration/coverage@0%": 0.019454234043486847, "calibration/coverage@1%": 0.019454234043486847, "calibration/coverage@10%": 0.12956900775976188, "calibration/coverage@15%": 0.19017048072492707, "calibration/coverage@20%": 0.3706057308693712, "calibration/coverage@25%": 0.4448679341188172, "calibration/coverage@30%": 0.534387958838888, "calibration/coverage@5%": 0.05705214527064088, "calibration/ece": 0.14103971828217957, "calibration/mean_confidence": 0.6078461706080489, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01371527777777779, "completions/max_length": 3491.8, "completions/max_terminated_length": 3491.8, "completions/mean_length": 699.88525390625, "completions/mean_terminated_length": 709.6391723632812, "completions/min_length": 0.0, "completions/min_terminated_length": 198.0, "epoch": 0.2519968500393745, "grad_norm": 0.0027277623303234577, "learning_rate": 2.5240384615384618e-06, "loss": -0.0372, "num_tokens": 201049678.0, "reward": 0.9713174700737, "reward_std": 0.1342061474919319, "rewards/accuracy_reward": 0.6723958373069763, "rewards/brier_reward": 0.7931610941886902, "rewards/confidence_uniqueness_reward": 0.9331457614898682, "rewards/format_reward": 0.9860243082046509, "rewards/frontier_coverage_0": 0.009938755445182324, "rewards/frontier_coverage_1": 0.009938755445182324, "rewards/frontier_coverage_10": 0.009938755445182324, "rewards/frontier_coverage_15": 0.009938755445182324, "rewards/frontier_coverage_20": 0.009938755445182324, "rewards/frontier_coverage_25": 0.009938755445182324, "rewards/frontier_coverage_5": 0.009938755445182324, "rewards/frontier_entropy_batch_reward": -0.3151816755533218, "signal/accuracy_reward/centered_abs_mean": 0.15600043386220933, "signal/accuracy_reward/group_std_mean": 0.21429117023944855, "signal/accuracy_reward/group_zero_std_frac": 0.36111111640930177, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9249350309371949, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07800021693110466, "signal/advantage_abs_mean": 0.7351720333099365, "signal/advantage_pre_scale_abs_mean": 0.0974724218249321, "signal/advantage_pre_scale_std": 0.15650634765625, "signal/advantage_std": 0.9832755327224731, "signal/brier_reward/centered_abs_mean": 0.13505308628082274, "signal/brier_reward/group_std_mean": 0.17719172239303588, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16167917251586914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013505308330059052, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0334759570658207, "signal/confidence_uniqueness_reward/group_std_mean": 0.05459783673286438, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04044450968503952, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033475957345217465, "signal/format_reward/centered_abs_mean": 0.02112087607383728, "signal/format_reward/group_std_mean": 0.03977926895022392, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12590970546007157, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01056043803691864, "signal/frontier_coverage_0/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_0/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_1/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_1/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_10/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_10/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_15/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_15/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_20/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_20/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_25/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_25/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_coverage_5/centered_abs_mean": 0.16347864717245103, "signal/frontier_coverage_5/group_std_mean": 0.21645487248897552, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0277607012540102, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023377447156235575, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33541697859764097, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40397828817367554, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.407314270734787, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03354169800877571, "step": 105 }, { "calibration/aurc": 0.17621004227172601, "calibration/batch_distribution_entropy": 0.9525836743555572, "calibration/buffer_distribution_entropy": 0.8424083283894106, "calibration/confidence_entropy": 0.47573754049226247, "calibration/coverage@0%": 0.03467249739791169, "calibration/coverage@1%": 0.03467249739791169, "calibration/coverage@10%": 0.27721002361313174, "calibration/coverage@15%": 0.4491039839531317, "calibration/coverage@20%": 0.554047743934082, "calibration/coverage@25%": 0.7966191406662304, "calibration/coverage@30%": 0.9115490951742627, "calibration/coverage@5%": 0.1632872569158427, "calibration/ece": 0.13985761805525604, "calibration/mean_confidence": 0.6064374692157269, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014322916666666675, "completions/max_length": 3597.0, "completions/max_terminated_length": 3597.0, "completions/mean_length": 732.68681640625, "completions/mean_terminated_length": 743.3656494140625, "completions/min_length": 0.0, "completions/min_terminated_length": 224.0, "epoch": 0.2639967000412495, "grad_norm": 0.003282015211880207, "learning_rate": 2.6442307692307696e-06, "loss": -0.0306, "num_tokens": 212598678.0, "reward": 0.9867424368858337, "reward_std": 0.13803467750549317, "rewards/accuracy_reward": 0.7007812619209289, "rewards/brier_reward": 0.7855064749717713, "rewards/confidence_uniqueness_reward": 0.9351040124893188, "rewards/format_reward": 0.9856770873069763, "rewards/frontier_coverage_0": -0.00702488785609603, "rewards/frontier_coverage_1": -0.00702488785609603, "rewards/frontier_coverage_10": -0.00702488785609603, "rewards/frontier_coverage_15": -0.00702488785609603, "rewards/frontier_coverage_20": -0.00702488785609603, "rewards/frontier_coverage_25": -0.00702488785609603, "rewards/frontier_coverage_5": -0.00702488785609603, "rewards/frontier_entropy_batch_reward": -0.27844594717025756, "signal/accuracy_reward/centered_abs_mean": 0.16480577290058135, "signal/accuracy_reward/group_std_mean": 0.22608107924461365, "signal/accuracy_reward/group_zero_std_frac": 0.3305555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9470046520233154, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08240288645029067, "signal/advantage_abs_mean": 0.7354275345802307, "signal/advantage_pre_scale_abs_mean": 0.10117111206054688, "signal/advantage_pre_scale_std": 0.16214256584644318, "signal/advantage_std": 0.9833307504653931, "signal/brier_reward/centered_abs_mean": 0.1462298572063446, "signal/brier_reward/group_std_mean": 0.18954940140247345, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16818097829818726, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014622985012829303, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03436548411846161, "signal/confidence_uniqueness_reward/group_std_mean": 0.05312940776348114, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0393658496439457, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034365484956651927, "signal/format_reward/centered_abs_mean": 0.02219509556889534, "signal/format_reward/group_std_mean": 0.03848949335515499, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1266273118555546, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01109754778444767, "signal/frontier_coverage_0/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_0/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_1/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_1/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_10/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_10/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_15/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_15/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_20/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_20/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_25/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_25/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_coverage_5/centered_abs_mean": 0.18365023136138917, "signal/frontier_coverage_5/group_std_mean": 0.24325567483901978, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030299390852451324, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026261982042342425, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3263388693332672, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3969201326370239, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.37608352303504944, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03263388872146607, "step": 110 }, { "calibration/aurc": 0.27223667379972827, "calibration/batch_distribution_entropy": 0.9693905636795204, "calibration/buffer_distribution_entropy": 0.8511333885278336, "calibration/confidence_entropy": 0.5051552336993171, "calibration/coverage@0%": 0.018523467203869926, "calibration/coverage@1%": 0.018523467203869926, "calibration/coverage@10%": 0.08575949309203403, "calibration/coverage@15%": 0.19812398692634017, "calibration/coverage@20%": 0.41265111135322263, "calibration/coverage@25%": 0.5600856339560352, "calibration/coverage@30%": 0.6692724289073688, "calibration/coverage@5%": 0.023828506991668337, "calibration/ece": 0.156115622125503, "calibration/mean_confidence": 0.5472786349034584, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017795138888888885, "completions/max_length": 3806.2, "completions/max_terminated_length": 3806.2, "completions/mean_length": 723.3151977539062, "completions/mean_terminated_length": 736.4974487304687, "completions/min_length": 0.0, "completions/min_terminated_length": 207.0, "epoch": 0.27599655004312446, "grad_norm": 0.002938604913651943, "learning_rate": 2.7644230769230775e-06, "loss": -0.0481, "num_tokens": 224010469.0, "reward": 0.9614550828933716, "reward_std": 0.14364836513996124, "rewards/accuracy_reward": 0.6545138955116272, "rewards/brier_reward": 0.7815379858016968, "rewards/confidence_uniqueness_reward": 0.9305242776870728, "rewards/format_reward": 0.9817708492279053, "rewards/frontier_coverage_0": 0.014907448133453727, "rewards/frontier_coverage_1": 0.014907448133453727, "rewards/frontier_coverage_10": 0.014907448133453727, "rewards/frontier_coverage_15": 0.014907448133453727, "rewards/frontier_coverage_20": 0.014907448133453727, "rewards/frontier_coverage_25": 0.014907448133453727, "rewards/frontier_coverage_5": 0.014907448133453727, "rewards/frontier_entropy_batch_reward": -0.29385790824890134, "signal/accuracy_reward/centered_abs_mean": 0.16838107705116273, "signal/accuracy_reward/group_std_mean": 0.21618228256702424, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9831504344940185, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08419053852558137, "signal/advantage_abs_mean": 0.7567147850990296, "signal/advantage_pre_scale_abs_mean": 0.10796858966350556, "signal/advantage_pre_scale_std": 0.1700371354818344, "signal/advantage_std": 0.9833152413368225, "signal/brier_reward/centered_abs_mean": 0.1507797509431839, "signal/brier_reward/group_std_mean": 0.1942868411540985, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1758878141641617, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015077975019812584, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040679140388965605, "signal/confidence_uniqueness_reward/group_std_mean": 0.06572701260447503, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047537700086832044, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004067914048209787, "signal/format_reward/centered_abs_mean": 0.02918836809694767, "signal/format_reward/group_std_mean": 0.052632787078619, "signal/format_reward/group_zero_std_frac": 0.7944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1706949606537819, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014594184048473836, "signal/frontier_coverage_0/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_0/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_1/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_1/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_10/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_10/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_15/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_15/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_20/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_20/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_25/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_25/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_coverage_5/centered_abs_mean": 0.18486830592155457, "signal/frontier_coverage_5/group_std_mean": 0.24456796944141387, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03077918142080307, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026436167769134044, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3445523798465729, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41339404582977296, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4024454474449158, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03445524051785469, "step": 115 }, { "calibration/aurc": 0.29549355034028324, "calibration/batch_distribution_entropy": 0.9595540967587365, "calibration/buffer_distribution_entropy": 0.8601922185063469, "calibration/confidence_entropy": 0.4986714711203833, "calibration/coverage@0%": 0.031789573354127215, "calibration/coverage@1%": 0.031789573354127215, "calibration/coverage@10%": 0.1535061824663028, "calibration/coverage@15%": 0.36279937618358027, "calibration/coverage@20%": 0.4954773309568898, "calibration/coverage@25%": 0.5412442909658015, "calibration/coverage@30%": 0.5628272251308901, "calibration/coverage@5%": 0.12540659463072296, "calibration/ece": 0.18322134358492953, "calibration/mean_confidence": 0.5513955572073772, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333326, "completions/max_length": 3744.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 705.9643310546875, "completions/mean_terminated_length": 714.6814453125, "completions/min_length": 0.0, "completions/min_terminated_length": 219.2, "epoch": 0.28799640004499943, "grad_norm": 0.003744702087715268, "learning_rate": 2.8846153846153845e-06, "loss": -0.0343, "num_tokens": 235225034.0, "reward": 0.9743936657905579, "reward_std": 0.12764326333999634, "rewards/accuracy_reward": 0.669444453716278, "rewards/brier_reward": 0.7871620059013367, "rewards/confidence_uniqueness_reward": 0.9371712327003479, "rewards/format_reward": 0.9875868082046508, "rewards/frontier_coverage_0": 0.018821701966226102, "rewards/frontier_coverage_1": 0.018821701966226102, "rewards/frontier_coverage_10": 0.018821701966226102, "rewards/frontier_coverage_15": 0.018821701966226102, "rewards/frontier_coverage_20": 0.018821701966226102, "rewards/frontier_coverage_25": 0.018821701966226102, "rewards/frontier_coverage_5": 0.018821701966226102, "rewards/frontier_entropy_batch_reward": -0.28439350724220275, "signal/accuracy_reward/centered_abs_mean": 0.15498046576976776, "signal/accuracy_reward/group_std_mean": 0.207235848903656, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9903906464576722, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07749023288488388, "signal/advantage_abs_mean": 0.7500714540481568, "signal/advantage_pre_scale_abs_mean": 0.09475149214267731, "signal/advantage_pre_scale_std": 0.1524016410112381, "signal/advantage_std": 0.9832012891769409, "signal/brier_reward/centered_abs_mean": 0.1421737015247345, "signal/brier_reward/group_std_mean": 0.18224007189273833, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18270126581192017, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014217370934784413, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031325727701187134, "signal/confidence_uniqueness_reward/group_std_mean": 0.05049348995089531, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04027099572122097, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031325726769864557, "signal/format_reward/centered_abs_mean": 0.01992729976773262, "signal/format_reward/group_std_mean": 0.03685051053762436, "signal/format_reward/group_zero_std_frac": 0.8472222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12836382985115052, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00996364988386631, "signal/frontier_coverage_0/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_0/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_1/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_1/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_10/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_10/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_15/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_15/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_20/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_20/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_25/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_25/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_coverage_5/centered_abs_mean": 0.18434422910213472, "signal/frontier_coverage_5/group_std_mean": 0.24150433838367463, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388373181223869, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026361226569861173, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3292065501213074, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39916940331459044, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42209590673446656, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03292065560817718, "step": 120 }, { "calibration/aurc": 0.15347697896922433, "calibration/batch_distribution_entropy": 0.9279338995247632, "calibration/buffer_distribution_entropy": 0.8692291819371754, "calibration/confidence_entropy": 0.4797209891546649, "calibration/coverage@0%": 0.029773499567166052, "calibration/coverage@1%": 0.029773499567166052, "calibration/coverage@10%": 0.432970158601062, "calibration/coverage@15%": 0.5448622348892085, "calibration/coverage@20%": 0.6590509926674206, "calibration/coverage@25%": 0.8772514419071686, "calibration/coverage@30%": 0.957779339334832, "calibration/coverage@5%": 0.29304183072469436, "calibration/ece": 0.1280785228841864, "calibration/mean_confidence": 0.6281016420313883, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01171875, "completions/max_length": 3060.8, "completions/max_terminated_length": 3060.8, "completions/mean_length": 678.2833251953125, "completions/mean_terminated_length": 686.427001953125, "completions/min_length": 0.0, "completions/min_terminated_length": 199.6, "epoch": 0.2999962500468744, "grad_norm": 0.002649526810273528, "learning_rate": 3.0048076923076923e-06, "loss": -0.0264, "num_tokens": 246156522.0, "reward": 0.9789023041725159, "reward_std": 0.13094690144062043, "rewards/accuracy_reward": 0.6890625, "rewards/brier_reward": 0.8089505910873414, "rewards/confidence_uniqueness_reward": 0.9324007034301758, "rewards/format_reward": 0.98828125, "rewards/frontier_coverage_0": 0.020671736821532248, "rewards/frontier_coverage_1": 0.020671736821532248, "rewards/frontier_coverage_10": 0.020671736821532248, "rewards/frontier_coverage_15": 0.020671736821532248, "rewards/frontier_coverage_20": 0.020671736821532248, "rewards/frontier_coverage_25": 0.020671736821532248, "rewards/frontier_coverage_5": 0.020671736821532248, "rewards/frontier_entropy_batch_reward": -0.35973963141441345, "signal/accuracy_reward/centered_abs_mean": 0.16453993022441865, "signal/accuracy_reward/group_std_mean": 0.21474986672401428, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0882207155227661, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08226996511220933, "signal/advantage_abs_mean": 0.7664442896842957, "signal/advantage_pre_scale_abs_mean": 0.10022006183862686, "signal/advantage_pre_scale_std": 0.1579681694507599, "signal/advantage_std": 0.9831624984741211, "signal/brier_reward/centered_abs_mean": 0.13348801881074907, "signal/brier_reward/group_std_mean": 0.17074580788612365, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17603938281536102, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013348801992833614, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033481241390109065, "signal/confidence_uniqueness_reward/group_std_mean": 0.04822203889489174, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04422098770737648, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033481243066489697, "signal/format_reward/centered_abs_mean": 0.01887478269636631, "signal/format_reward/group_std_mean": 0.030514462664723397, "signal/format_reward/group_zero_std_frac": 0.8888888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12388529032468795, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009437391348183156, "signal/frontier_coverage_0/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_0/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_1/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_1/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_10/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_10/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_15/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_15/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_20/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_20/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_25/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_25/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_coverage_5/centered_abs_mean": 0.16123655140399934, "signal/frontier_coverage_5/group_std_mean": 0.21205799281597137, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030381349101662635, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002305682725273073, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35917252898216245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4227446138858795, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47557589411735535, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03591725528240204, "step": 125 }, { "calibration/aurc": 0.24075648361334556, "calibration/batch_distribution_entropy": 0.9367937132872782, "calibration/buffer_distribution_entropy": 0.8743558081549685, "calibration/confidence_entropy": 0.48164570637542764, "calibration/coverage@0%": 0.00796905532697007, "calibration/coverage@1%": 0.00796905532697007, "calibration/coverage@10%": 0.1891113738103692, "calibration/coverage@15%": 0.2610659593280283, "calibration/coverage@20%": 0.43392440666402105, "calibration/coverage@25%": 0.5412922742747343, "calibration/coverage@30%": 0.6796504855991146, "calibration/coverage@5%": 0.00796905532697007, "calibration/ece": 0.11142523508126075, "calibration/mean_confidence": 0.6235650795376477, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.024913194444444443, "completions/max_length": 3119.8, "completions/max_terminated_length": 3119.8, "completions/mean_length": 677.76962890625, "completions/mean_terminated_length": 695.1375, "completions/min_length": 0.0, "completions/min_terminated_length": 182.0, "epoch": 0.3119961000487494, "grad_norm": 0.00250594737008214, "learning_rate": 3.125e-06, "loss": -0.0618, "num_tokens": 257089228.0, "reward": 0.9503241419792176, "reward_std": 0.1572349935770035, "rewards/accuracy_reward": 0.6512152791023255, "rewards/brier_reward": 0.7802307844161988, "rewards/confidence_uniqueness_reward": 0.9184599637985229, "rewards/format_reward": 0.9747395873069763, "rewards/frontier_coverage_0": 0.020750408340245486, "rewards/frontier_coverage_1": 0.020750408340245486, "rewards/frontier_coverage_10": 0.020750408340245486, "rewards/frontier_coverage_15": 0.020750408340245486, "rewards/frontier_coverage_20": 0.020750408340245486, "rewards/frontier_coverage_25": 0.020750408340245486, "rewards/frontier_coverage_5": 0.020750408340245486, "rewards/frontier_entropy_batch_reward": -0.34599482715129853, "signal/accuracy_reward/centered_abs_mean": 0.18407118022441865, "signal/accuracy_reward/group_std_mean": 0.2376004248857498, "signal/accuracy_reward/group_zero_std_frac": 0.3500000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0418458223342895, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09203559011220933, "signal/advantage_abs_mean": 0.7402811050415039, "signal/advantage_pre_scale_abs_mean": 0.11711515635251998, "signal/advantage_pre_scale_std": 0.18535294532775878, "signal/advantage_std": 0.9833332061767578, "signal/brier_reward/centered_abs_mean": 0.15127059519290925, "signal/brier_reward/group_std_mean": 0.19668636918067933, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1720490723848343, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015127059258520602, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05016105026006699, "signal/confidence_uniqueness_reward/group_std_mean": 0.07742284685373306, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05758904665708542, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005016105063259601, "signal/format_reward/centered_abs_mean": 0.03739691823720932, "signal/format_reward/group_std_mean": 0.06282595321536064, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21188210248947142, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01869845911860466, "signal/frontier_coverage_0/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_0/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_1/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_1/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_10/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_10/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_15/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_15/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_20/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_20/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_25/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_25/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_coverage_5/centered_abs_mean": 0.17104237377643586, "signal/frontier_coverage_5/group_std_mean": 0.22613960802555083, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02733422853052616, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024459057254716753, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3354610979557037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4053588271141052, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38607336282730104, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03354610875248909, "step": 130 }, { "calibration/aurc": 0.20758602314082264, "calibration/batch_distribution_entropy": 0.9303398449193102, "calibration/buffer_distribution_entropy": 0.879867407291882, "calibration/confidence_entropy": 0.5022852646546221, "calibration/coverage@0%": 0.0857999849518162, "calibration/coverage@1%": 0.1547294888682653, "calibration/coverage@10%": 0.3439933023262488, "calibration/coverage@15%": 0.4155660301093286, "calibration/coverage@20%": 0.5031820825736764, "calibration/coverage@25%": 0.5919049119490418, "calibration/coverage@30%": 0.6689481417605041, "calibration/coverage@5%": 0.2552832986170347, "calibration/ece": 0.19673193883504392, "calibration/mean_confidence": 0.5709542997574626, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03003472222222221, "completions/max_length": 3558.6, "completions/max_terminated_length": 3558.6, "completions/mean_length": 646.0067749023438, "completions/mean_terminated_length": 665.8999633789062, "completions/min_length": 0.0, "completions/min_terminated_length": 185.0, "epoch": 0.32399595005062437, "grad_norm": 0.0025195139460265636, "learning_rate": 3.245192307692308e-06, "loss": -0.0755, "num_tokens": 267624250.0, "reward": 0.9493022203445435, "reward_std": 0.16051433086395264, "rewards/accuracy_reward": 0.6564236164093018, "rewards/brier_reward": 0.7611977815628052, "rewards/confidence_uniqueness_reward": 0.9178310751914978, "rewards/format_reward": 0.9690972208976746, "rewards/frontier_coverage_0": 0.002158835157752037, "rewards/frontier_coverage_1": 0.002158835157752037, "rewards/frontier_coverage_10": 0.002158835157752037, "rewards/frontier_coverage_15": 0.002158835157752037, "rewards/frontier_coverage_20": 0.002158835157752037, "rewards/frontier_coverage_25": 0.002158835157752037, "rewards/frontier_coverage_5": 0.002158835157752037, "rewards/frontier_entropy_batch_reward": -0.31577223539352417, "signal/accuracy_reward/centered_abs_mean": 0.17967664897441865, "signal/accuracy_reward/group_std_mean": 0.23678012788295746, "signal/accuracy_reward/group_zero_std_frac": 0.3250000089406967, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9074344158172607, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08983832448720933, "signal/advantage_abs_mean": 0.7302809596061707, "signal/advantage_pre_scale_abs_mean": 0.11728082150220871, "signal/advantage_pre_scale_std": 0.18725473880767823, "signal/advantage_std": 0.9834550261497498, "signal/brier_reward/centered_abs_mean": 0.15965774655342102, "signal/brier_reward/group_std_mean": 0.20320362746715545, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1627894550561905, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01596577502787113, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.057780878990888594, "signal/confidence_uniqueness_reward/group_std_mean": 0.09125057309865951, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05861600786447525, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005778087861835956, "signal/format_reward/centered_abs_mean": 0.04900173619389534, "signal/format_reward/group_std_mean": 0.08135172799229622, "signal/format_reward/group_zero_std_frac": 0.6972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.24763497412204744, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02450086809694767, "signal/frontier_coverage_0/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_0/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_1/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_1/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_10/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_10/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_15/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_15/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_20/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_20/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_25/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_25/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_coverage_5/centered_abs_mean": 0.2009547770023346, "signal/frontier_coverage_5/group_std_mean": 0.2594577521085739, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029287652298808097, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002873653150163591, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.338104647397995, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4068137645721436, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.34629579186439513, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033810464292764665, "step": 135 }, { "calibration/aurc": 0.18906472468836222, "calibration/batch_distribution_entropy": 0.8814205304070386, "calibration/buffer_distribution_entropy": 0.8831141026061242, "calibration/confidence_entropy": 0.4783131070096413, "calibration/coverage@0%": 0.007956662839798317, "calibration/coverage@1%": 0.007956662839798317, "calibration/coverage@10%": 0.19745358536747154, "calibration/coverage@15%": 0.3055121954284983, "calibration/coverage@20%": 0.6023303658836751, "calibration/coverage@25%": 0.8259447416874842, "calibration/coverage@30%": 0.9512643107725396, "calibration/coverage@5%": 0.05874200315393445, "calibration/ece": 0.09537625207937526, "calibration/mean_confidence": 0.6794771410097296, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028732638888888905, "completions/max_length": 3704.2, "completions/max_terminated_length": 3704.2, "completions/mean_length": 659.917626953125, "completions/mean_terminated_length": 679.4125610351563, "completions/min_length": 0.0, "completions/min_terminated_length": 190.2, "epoch": 0.33599580005249935, "grad_norm": 0.0030611190013587475, "learning_rate": 3.365384615384616e-06, "loss": -0.078, "num_tokens": 278330725.0, "reward": 0.942699670791626, "reward_std": 0.16864260733127595, "rewards/accuracy_reward": 0.6534722328186036, "rewards/brier_reward": 0.7861078023910523, "rewards/confidence_uniqueness_reward": 0.9161162137985229, "rewards/format_reward": 0.9706597089767456, "rewards/frontier_coverage_0": 0.02347471434623003, "rewards/frontier_coverage_1": 0.02347471434623003, "rewards/frontier_coverage_10": 0.02347471434623003, "rewards/frontier_coverage_15": 0.02347471434623003, "rewards/frontier_coverage_20": 0.02347471434623003, "rewards/frontier_coverage_25": 0.02347471434623003, "rewards/frontier_coverage_5": 0.02347471434623003, "rewards/frontier_entropy_batch_reward": -0.41938512921333315, "signal/accuracy_reward/centered_abs_mean": 0.17179904580116273, "signal/accuracy_reward/group_std_mean": 0.22940363883972167, "signal/accuracy_reward/group_zero_std_frac": 0.3444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9489439964294434, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08589952290058137, "signal/advantage_abs_mean": 0.7321731686592102, "signal/advantage_pre_scale_abs_mean": 0.12239420711994171, "signal/advantage_pre_scale_std": 0.19716466069221497, "signal/advantage_std": 0.9833737254142761, "signal/brier_reward/centered_abs_mean": 0.1507904589176178, "signal/brier_reward/group_std_mean": 0.1969256341457367, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16663262248039246, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015079045854508876, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06016582772135735, "signal/confidence_uniqueness_reward/group_std_mean": 0.09862598031759262, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0665690153837204, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006016582809388637, "signal/format_reward/centered_abs_mean": 0.04906684011220932, "signal/format_reward/group_std_mean": 0.08623605370521545, "signal/format_reward/group_zero_std_frac": 0.669444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.26951748728752134, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02453342005610466, "signal/frontier_coverage_0/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_0/group_std_mean": 0.185016992688179, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_1/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_1/group_std_mean": 0.185016992688179, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_10/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_10/group_std_mean": 0.185016992688179, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_15/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_15/group_std_mean": 0.185016992688179, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_20/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_20/group_std_mean": 0.185016992688179, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_25/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_25/group_std_mean": 0.185016992688179, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_coverage_5/centered_abs_mean": 0.1365742042660713, "signal/frontier_coverage_5/group_std_mean": 0.185016992688179, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02126149646937847, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019530110061168672, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3603252172470093, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4246739625930786, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4002987802028656, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036032522842288014, "step": 140 }, { "calibration/aurc": 0.15697987006755532, "calibration/batch_distribution_entropy": 0.9844253334200659, "calibration/buffer_distribution_entropy": 0.8905394420507413, "calibration/confidence_entropy": 0.48629962729987214, "calibration/coverage@0%": 0.037176911659670284, "calibration/coverage@1%": 0.037176911659670284, "calibration/coverage@10%": 0.44460642204611933, "calibration/coverage@15%": 0.5577586774421538, "calibration/coverage@20%": 0.6965503392766921, "calibration/coverage@25%": 0.790469339916671, "calibration/coverage@30%": 0.8630696403110196, "calibration/coverage@5%": 0.26364939990934594, "calibration/ece": 0.16339695802470103, "calibration/mean_confidence": 0.5240559906457406, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025868055555555512, "completions/max_length": 3513.4, "completions/max_terminated_length": 3513.4, "completions/mean_length": 654.6441162109375, "completions/mean_terminated_length": 672.145263671875, "completions/min_length": 0.0, "completions/min_terminated_length": 214.4, "epoch": 0.34799565005437433, "grad_norm": 0.0024486000183969736, "learning_rate": 3.4855769230769233e-06, "loss": -0.0728, "num_tokens": 288936833.0, "reward": 0.981769073009491, "reward_std": 0.14131342768669128, "rewards/accuracy_reward": 0.7016493082046509, "rewards/brier_reward": 0.7855762124061585, "rewards/confidence_uniqueness_reward": 0.9288261771202088, "rewards/format_reward": 0.9740451335906982, "rewards/frontier_coverage_0": 0.0006571165286004543, "rewards/frontier_coverage_1": 0.0006571165286004543, "rewards/frontier_coverage_10": 0.0006571165286004543, "rewards/frontier_coverage_15": 0.0006571165286004543, "rewards/frontier_coverage_20": 0.0006571165286004543, "rewards/frontier_coverage_25": 0.0006571165286004543, "rewards/frontier_coverage_5": 0.0006571165286004543, "rewards/frontier_entropy_batch_reward": -0.2758418798446655, "signal/accuracy_reward/centered_abs_mean": 0.14620768427848815, "signal/accuracy_reward/group_std_mean": 0.1999648928642273, "signal/accuracy_reward/group_zero_std_frac": 0.40555556416511535, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.883146071434021, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07310384213924408, "signal/advantage_abs_mean": 0.7370108008384705, "signal/advantage_pre_scale_abs_mean": 0.10200686007738113, "signal/advantage_pre_scale_std": 0.1717151403427124, "signal/advantage_std": 0.9832675337791443, "signal/brier_reward/centered_abs_mean": 0.14895096719264983, "signal/brier_reward/group_std_mean": 0.19092276692390442, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1803687483072281, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014895097352564335, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04965474233031273, "signal/confidence_uniqueness_reward/group_std_mean": 0.07973605394363403, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.059922744333744046, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004965474270284176, "signal/format_reward/centered_abs_mean": 0.04082573838531971, "signal/format_reward/group_std_mean": 0.06935827732086182, "signal/format_reward/group_zero_std_frac": 0.7416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2456405758857727, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.020412869192659854, "signal/frontier_coverage_0/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_0/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_1/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_1/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_10/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_10/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_15/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_15/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_20/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_20/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_25/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_25/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_coverage_5/centered_abs_mean": 0.1881540447473526, "signal/frontier_coverage_5/group_std_mean": 0.24492749869823455, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032554148137569426, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026906028389930723, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3361767590045929, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4066691756248474, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40922998189926146, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0336176760494709, "step": 145 }, { "calibration/aurc": 0.1722534660782087, "calibration/batch_distribution_entropy": 0.9496003073139374, "calibration/buffer_distribution_entropy": 0.9066922407574388, "calibration/confidence_entropy": 0.48104717729150837, "calibration/coverage@0%": 0.042072927041753645, "calibration/coverage@1%": 0.042072927041753645, "calibration/coverage@10%": 0.3972435195237208, "calibration/coverage@15%": 0.5266129172586447, "calibration/coverage@20%": 0.6327406117153822, "calibration/coverage@25%": 0.7269504438656763, "calibration/coverage@30%": 0.7957203851288851, "calibration/coverage@5%": 0.17130425323208068, "calibration/ece": 0.17626248571877973, "calibration/mean_confidence": 0.5680134667175254, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015190972222222232, "completions/max_length": 3418.2, "completions/max_terminated_length": 3418.2, "completions/mean_length": 712.5080932617187, "completions/mean_terminated_length": 723.4612670898438, "completions/min_length": 0.0, "completions/min_terminated_length": 183.6, "epoch": 0.3599955000562493, "grad_norm": 0.0022300081327557564, "learning_rate": 3.605769230769231e-06, "loss": -0.0333, "num_tokens": 300255262.0, "reward": 0.9809937477111816, "reward_std": 0.13701283782720566, "rewards/accuracy_reward": 0.6916666626930237, "rewards/brier_reward": 0.804995310306549, "rewards/confidence_uniqueness_reward": 0.9330321192741394, "rewards/format_reward": 0.9847222089767456, "rewards/frontier_coverage_0": 0.020913063362240792, "rewards/frontier_coverage_1": 0.020913063362240792, "rewards/frontier_coverage_10": 0.020913063362240792, "rewards/frontier_coverage_15": 0.020913063362240792, "rewards/frontier_coverage_20": 0.020913063362240792, "rewards/frontier_coverage_25": 0.020913063362240792, "rewards/frontier_coverage_5": 0.020913063362240792, "rewards/frontier_entropy_batch_reward": -0.33096869885921476, "signal/accuracy_reward/centered_abs_mean": 0.16299913227558135, "signal/accuracy_reward/group_std_mean": 0.21699598133563996, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.002332079410553, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08149956613779068, "signal/advantage_abs_mean": 0.7397428989410401, "signal/advantage_pre_scale_abs_mean": 0.09951845407485962, "signal/advantage_pre_scale_std": 0.16154283583164214, "signal/advantage_std": 0.9832315325737, "signal/brier_reward/centered_abs_mean": 0.14122247993946074, "signal/brier_reward/group_std_mean": 0.1847561001777649, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17594003081321716, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014122248627245426, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03645235523581505, "signal/confidence_uniqueness_reward/group_std_mean": 0.06082368865609169, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.045828116685152055, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003645235579460859, "signal/format_reward/centered_abs_mean": 0.02452256940305233, "signal/format_reward/group_std_mean": 0.0467927910387516, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.153493233025074, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012261284701526164, "signal/frontier_coverage_0/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_0/group_std_mean": 0.23288733959198, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_1/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_1/group_std_mean": 0.23288733959198, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_10/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_10/group_std_mean": 0.23288733959198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_15/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_15/group_std_mean": 0.23288733959198, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_20/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_20/group_std_mean": 0.23288733959198, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_25/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_25/group_std_mean": 0.23288733959198, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_coverage_5/centered_abs_mean": 0.1739561676979065, "signal/frontier_coverage_5/group_std_mean": 0.23288733959198, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030741161853075027, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002487573237158358, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33918721675872804, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40726816058158877, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42680872082710264, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033918721601367, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.14129311824865207, "eval_calibration/batch_distribution_entropy": 0.9104665702327397, "eval_calibration/buffer_distribution_entropy": 0.9172057151238991, "eval_calibration/confidence_entropy": 0.5052203374676412, "eval_calibration/coverage@0%": 0.25285618279569894, "eval_calibration/coverage@1%": 0.25285618279569894, "eval_calibration/coverage@10%": 0.4534610215053763, "eval_calibration/coverage@15%": 0.668010752688172, "eval_calibration/coverage@20%": 0.777385752688172, "eval_calibration/coverage@25%": 0.9151545698924731, "eval_calibration/coverage@30%": 0.9623655913978495, "eval_calibration/coverage@5%": 0.358366935483871, "eval_calibration/ece": 0.24812658669354834, "eval_calibration/mean_confidence": 0.5544641391129032, "eval_completions/clipped_ratio": 0.013715277777777776, "eval_completions/max_length": 2725.5, "eval_completions/max_terminated_length": 2725.5, "eval_completions/mean_length": 682.8199768066406, "eval_completions/mean_terminated_length": 692.2769368489584, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 232.66666666666666, "eval_loss": 0.0, "eval_num_tokens": 300255262.0, "eval_reward": 0.9028725524743398, "eval_reward_std": 0.22518395135800043, "eval_rewards/accuracy_reward": 0.6770833333333334, "eval_rewards/brier_reward": 0.7904597421487173, "eval_rewards/confidence_uniqueness_reward": 0.8859053353468577, "eval_rewards/format_reward": 0.9869791666666666, "eval_rewards/frontier_coverage_0": 0.019008085131645203, "eval_rewards/frontier_coverage_1": 0.019008085131645203, "eval_rewards/frontier_coverage_10": 0.019008085131645203, "eval_rewards/frontier_coverage_15": 0.019008085131645203, "eval_rewards/frontier_coverage_20": 0.019008085131645203, "eval_rewards/frontier_coverage_25": 0.019008085131645203, "eval_rewards/frontier_coverage_5": 0.019008085131645203, "eval_rewards/frontier_entropy_batch_reward": -0.9869791666666666, "eval_runtime": 209.653, "eval_samples_per_second": 4.77, "eval_signal/accuracy_reward/centered_abs_mean": 0.4202473958333333, "eval_signal/accuracy_reward/group_std_mean": 0.46384624640146893, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9487552146116892, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21012369791666666, "eval_signal/advantage_abs_mean": 0.8500068088372549, "eval_signal/advantage_pre_scale_abs_mean": 0.1913518731792768, "eval_signal/advantage_pre_scale_std": 0.22435899823904037, "eval_signal/advantage_std": 0.9863767127195994, "eval_signal/brier_reward/centered_abs_mean": 0.19685744742552438, "eval_signal/brier_reward/group_std_mean": 0.25481796513001126, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0891647810737292, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019685745239257812, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05400043291350206, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09014810870091121, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02440998361756404, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005400043601791064, "eval_signal/format_reward/centered_abs_mean": 0.024793836132933695, "eval_signal/format_reward/group_std_mean": 0.06416239372144143, "eval_signal/format_reward/group_zero_std_frac": 0.6666666915019354, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.05534577121337255, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012396918066466847, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_0/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_1/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_10/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_15/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_20/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_25/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2995465894540151, "eval_signal/frontier_coverage_5/group_std_mean": 0.4160442252953847, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01938629026214282, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004283516357342402, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.024793836132933695, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.06416239372144143, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666915019354, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.011069154599681497, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002479383605532348, "eval_steps_per_second": 0.029, "step": 150 }, { "epoch": 0.3599955000562493, "step": 150, "train_probe_calibration/aurc": 0.12726604129252259, "train_probe_calibration/batch_distribution_entropy": 0.9006689267592959, "train_probe_calibration/buffer_distribution_entropy": 0.9187270539917702, "train_probe_calibration/confidence_entropy": 0.4242119471569765, "train_probe_calibration/coverage@0%": 0.31468413978494625, "train_probe_calibration/coverage@1%": 0.31468413978494625, "train_probe_calibration/coverage@10%": 0.42422715053763443, "train_probe_calibration/coverage@15%": 0.6587701612903226, "train_probe_calibration/coverage@20%": 0.7639448924731184, "train_probe_calibration/coverage@25%": 0.9104502688172044, "train_probe_calibration/coverage@30%": 0.9524529569892474, "train_probe_calibration/coverage@5%": 0.31468413978494625, "train_probe_calibration/ece": 0.2248022620967742, "train_probe_calibration/mean_confidence": 0.5427504596774193, "train_probe_completions/clipped_ratio": 0.007812500000000019, "train_probe_completions/max_length": 2014.8333333333333, "train_probe_completions/max_terminated_length": 2014.8333333333333, "train_probe_completions/mean_length": 680.6555786132812, "train_probe_completions/mean_terminated_length": 685.9972839355469, "train_probe_completions/min_length": 34.0, "train_probe_completions/min_terminated_length": 193.66666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 300255262.0, "train_probe_reward": 0.9183924396832784, "train_probe_reward_std": 0.21424931039412817, "train_probe_rewards/accuracy_reward": 0.7005208333333334, "train_probe_rewards/brier_reward": 0.8044686516125997, "train_probe_rewards/confidence_uniqueness_reward": 0.8893264333407084, "train_probe_rewards/format_reward": 0.9921875099341074, "train_probe_rewards/frontier_coverage_0": 0.01875637743311624, "train_probe_rewards/frontier_coverage_1": 0.01875637743311624, "train_probe_rewards/frontier_coverage_10": 0.01875637743311624, "train_probe_rewards/frontier_coverage_15": 0.01875637743311624, "train_probe_rewards/frontier_coverage_20": 0.01875637743311624, "train_probe_rewards/frontier_coverage_25": 0.01875637743311624, "train_probe_rewards/frontier_coverage_5": 0.01875637743311624, "train_probe_rewards/frontier_entropy_batch_reward": -0.9921875099341074, "train_probe_runtime": 194.1418, "train_probe_samples_per_second": 5.151, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4080403645833333, "train_probe_signal/accuracy_reward/group_std_mean": 0.45769575734933216, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9646410147349039, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20402018229166666, "train_probe_signal/advantage_abs_mean": 0.845906933148702, "train_probe_signal/advantage_pre_scale_abs_mean": 0.181622343758742, "train_probe_signal/advantage_pre_scale_std": 0.2128123790025711, "train_probe_signal/advantage_std": 0.9863570928573608, "train_probe_signal/brier_reward/centered_abs_mean": 0.19344795495271683, "train_probe_signal/brier_reward/group_std_mean": 0.25178463260332745, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0916658565402031, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.0193447961161534, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0479762547959884, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.0767448153346777, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022632751303414505, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004797625510642926, "train_probe_signal/format_reward/centered_abs_mean": 0.015136718284338713, "train_probe_signal/format_reward/group_std_mean": 0.044194173999130726, "train_probe_signal/format_reward/group_zero_std_frac": 0.7500000248352686, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03515924823780855, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.007568359142169356, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30051541328430176, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4227322389682134, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02031859972824653, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004297370323911309, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718284338713, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173999130726, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7500000248352686, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0070318499735246105, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719254466395, "train_probe_steps_per_second": 0.031 }, { "calibration/aurc": 0.15515567262371474, "calibration/batch_distribution_entropy": 0.9796127451140773, "calibration/buffer_distribution_entropy": 0.9254467256231689, "calibration/confidence_entropy": 0.4908238893003113, "calibration/coverage@0%": 0.024257363867401783, "calibration/coverage@1%": 0.024257363867401783, "calibration/coverage@10%": 0.26837621050000254, "calibration/coverage@15%": 0.6128535749082874, "calibration/coverage@20%": 0.7565885918985946, "calibration/coverage@25%": 0.8687765224579774, "calibration/coverage@30%": 0.9395811518324606, "calibration/coverage@5%": 0.13990723124140708, "calibration/ece": 0.19067582396163238, "calibration/mean_confidence": 0.5558143400531235, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444442, "completions/max_length": 3296.2, "completions/max_terminated_length": 3296.2, "completions/mean_length": 660.3401123046875, "completions/mean_terminated_length": 666.063525390625, "completions/min_length": 0.0, "completions/min_terminated_length": 182.8, "epoch": 0.3719953500581243, "grad_norm": 0.0022886916995048523, "learning_rate": 3.725961538461539e-06, "loss": -0.0212, "num_tokens": 310970092.0, "reward": 1.0068758606910706, "reward_std": 0.12418241798877716, "rewards/accuracy_reward": 0.7279513955116272, "rewards/brier_reward": 0.8095590114593506, "rewards/confidence_uniqueness_reward": 0.9424502491950989, "rewards/format_reward": 0.9914930462837219, "rewards/frontier_coverage_0": 0.0012785772909410299, "rewards/frontier_coverage_1": 0.0012785772909410299, "rewards/frontier_coverage_10": 0.0012785772909410299, "rewards/frontier_coverage_15": 0.0012785772909410299, "rewards/frontier_coverage_20": 0.0012785772909410299, "rewards/frontier_coverage_25": 0.0019289300893433393, "rewards/frontier_coverage_5": 0.0012785772909410299, "rewards/frontier_entropy_batch_reward": -0.2818462073802948, "signal/accuracy_reward/centered_abs_mean": 0.16677517294883729, "signal/accuracy_reward/group_std_mean": 0.21808722317218782, "signal/accuracy_reward/group_zero_std_frac": 0.38611111640930174, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.116166591644287, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08338758647441864, "signal/advantage_abs_mean": 0.7497113704681396, "signal/advantage_pre_scale_abs_mean": 0.09206513911485673, "signal/advantage_pre_scale_std": 0.14799903333187103, "signal/advantage_std": 0.9831433296203613, "signal/brier_reward/centered_abs_mean": 0.13939605057239532, "signal/brier_reward/group_std_mean": 0.17993341088294984, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18695828914642335, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01393960528075695, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027320655062794685, "signal/confidence_uniqueness_reward/group_std_mean": 0.04605055823922157, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.036707811057567596, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027320656459778546, "signal/format_reward/centered_abs_mean": 0.015180121548473835, "signal/format_reward/group_std_mean": 0.0315033558756113, "signal/format_reward/group_zero_std_frac": 0.8611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10213274359703065, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007590060774236918, "signal/frontier_coverage_0/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_0/group_std_mean": 0.255666583776474, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_coverage_1/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_1/group_std_mean": 0.255666583776474, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_coverage_10/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_10/group_std_mean": 0.255666583776474, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_coverage_15/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_15/group_std_mean": 0.255666583776474, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_coverage_20/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_20/group_std_mean": 0.255666583776474, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_coverage_25/centered_abs_mean": 0.19435930848121644, "signal/frontier_coverage_25/group_std_mean": 0.2547897040843964, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037307870388031, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0027793380431830883, "signal/frontier_coverage_5/centered_abs_mean": 0.1950198382139206, "signal/frontier_coverage_5/group_std_mean": 0.255666583776474, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03742770254611969, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027887837029993536, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.321687251329422, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3919772803783417, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4316555678844452, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0321687251329422, "step": 155 }, { "calibration/aurc": 0.13322411582750976, "calibration/batch_distribution_entropy": 0.9164133932874314, "calibration/buffer_distribution_entropy": 0.9378270413347043, "calibration/confidence_entropy": 0.49663806864050064, "calibration/coverage@0%": 0.10162749168933057, "calibration/coverage@1%": 0.16073039406400075, "calibration/coverage@10%": 0.6032914185290437, "calibration/coverage@15%": 0.6842972947671463, "calibration/coverage@20%": 0.8049335629921259, "calibration/coverage@25%": 0.847260498687664, "calibration/coverage@30%": 0.8797900262467191, "calibration/coverage@5%": 0.3701992662338055, "calibration/ece": 0.13629638618002707, "calibration/mean_confidence": 0.6444682599329354, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011979166666666674, "completions/max_length": 3493.4, "completions/max_terminated_length": 3493.4, "completions/mean_length": 676.3605834960938, "completions/mean_terminated_length": 684.583251953125, "completions/min_length": 0.0, "completions/min_terminated_length": 199.0, "epoch": 0.38399520005999926, "grad_norm": 0.0023072566837072372, "learning_rate": 3.846153846153847e-06, "loss": -0.0317, "num_tokens": 321849062.0, "reward": 0.9769848823547364, "reward_std": 0.12771027386188508, "rewards/accuracy_reward": 0.677343738079071, "rewards/brier_reward": 0.8088360667228699, "rewards/confidence_uniqueness_reward": 0.937406325340271, "rewards/format_reward": 0.9876736044883728, "rewards/frontier_coverage_0": 0.024465531995519995, "rewards/frontier_coverage_1": 0.024465531995519995, "rewards/frontier_coverage_10": 0.024465531995519995, "rewards/frontier_coverage_15": 0.024465531995519995, "rewards/frontier_coverage_20": 0.024465531995519995, "rewards/frontier_coverage_25": 0.026557547226548194, "rewards/frontier_coverage_5": 0.024465531995519995, "rewards/frontier_entropy_batch_reward": -0.326269394159317, "signal/accuracy_reward/centered_abs_mean": 0.15506184995174407, "signal/accuracy_reward/group_std_mean": 0.20306468904018402, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0282334446907044, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07753092497587204, "signal/advantage_abs_mean": 0.7636637091636658, "signal/advantage_pre_scale_abs_mean": 0.09637981355190277, "signal/advantage_pre_scale_std": 0.15364661812782288, "signal/advantage_std": 0.9831578254699707, "signal/brier_reward/centered_abs_mean": 0.1285212144255638, "signal/brier_reward/group_std_mean": 0.16701798737049103, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17068175673484803, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012852122262120246, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03288968279957771, "signal/confidence_uniqueness_reward/group_std_mean": 0.053009679168462755, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04366851449012756, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032889683730900287, "signal/format_reward/centered_abs_mean": 0.02045355923473835, "signal/format_reward/group_std_mean": 0.03811209574341774, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13571836948394775, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010226779617369175, "signal/frontier_coverage_0/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_0/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_coverage_1/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_1/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_coverage_10/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_10/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_coverage_15/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_15/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_coverage_20/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_20/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_coverage_25/centered_abs_mean": 0.1387791097164154, "signal/frontier_coverage_25/group_std_mean": 0.18441854119300843, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026340827718377112, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001984541164711118, "signal/frontier_coverage_5/centered_abs_mean": 0.16244593560695647, "signal/frontier_coverage_5/group_std_mean": 0.21394776403903962, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030822818726301195, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023229768965393307, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3421880781650543, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40926730036735537, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4542033314704895, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03421880826354027, "step": 160 }, { "calibration/aurc": 0.11436634074025849, "calibration/batch_distribution_entropy": 0.9617849759011884, "calibration/buffer_distribution_entropy": 0.9484377352757761, "calibration/confidence_entropy": 0.4930776484401013, "calibration/coverage@0%": 0.10334389333167812, "calibration/coverage@1%": 0.11740639333167813, "calibration/coverage@10%": 0.5556207287320357, "calibration/coverage@15%": 0.7181706536448791, "calibration/coverage@20%": 0.785286071794031, "calibration/coverage@25%": 0.8616119070998108, "calibration/coverage@30%": 0.9275821322181891, "calibration/coverage@5%": 0.4210177667394427, "calibration/ece": 0.17937487551933348, "calibration/mean_confidence": 0.5590953376917799, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015451388888888907, "completions/max_length": 3181.6, "completions/max_terminated_length": 3181.6, "completions/mean_length": 711.1984497070313, "completions/mean_terminated_length": 722.4953369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 204.8, "epoch": 0.39599505006187424, "grad_norm": 0.0024608452804386616, "learning_rate": 3.966346153846154e-06, "loss": -0.035, "num_tokens": 333181140.0, "reward": 0.9782179236412049, "reward_std": 0.12205760926008224, "rewards/accuracy_reward": 0.6736111044883728, "rewards/brier_reward": 0.8028805494308472, "rewards/confidence_uniqueness_reward": 0.9423322916030884, "rewards/format_reward": 0.9844618082046509, "rewards/frontier_coverage_0": 0.028045324282720684, "rewards/frontier_coverage_1": 0.028045324282720684, "rewards/frontier_coverage_10": 0.028045324282720684, "rewards/frontier_coverage_15": 0.028045324282720684, "rewards/frontier_coverage_20": 0.03051578577142209, "rewards/frontier_coverage_25": 0.03762260042130947, "rewards/frontier_coverage_5": 0.028045324282720684, "rewards/frontier_entropy_batch_reward": -0.2831945657730103, "signal/accuracy_reward/centered_abs_mean": 0.13511284589767455, "signal/accuracy_reward/group_std_mean": 0.18115203380584716, "signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9182628631591797, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06755642294883728, "signal/advantage_abs_mean": 0.7523734450340271, "signal/advantage_pre_scale_abs_mean": 0.09113254398107529, "signal/advantage_pre_scale_std": 0.14792825877666474, "signal/advantage_std": 0.9831262826919556, "signal/brier_reward/centered_abs_mean": 0.13245978951454163, "signal/brier_reward/group_std_mean": 0.170314958691597, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18007222414016724, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013245978951454162, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03488457053899765, "signal/confidence_uniqueness_reward/group_std_mean": 0.054613684117794034, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04710889980196953, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034884572960436344, "signal/format_reward/centered_abs_mean": 0.024462890625, "signal/format_reward/group_std_mean": 0.04212077111005783, "signal/format_reward/group_zero_std_frac": 0.8388889074325562, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.16409094184637069, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0122314453125, "signal/frontier_coverage_0/centered_abs_mean": 0.17627032101154327, "signal/frontier_coverage_0/group_std_mean": 0.22849083840847015, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034184883907437326, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025206655263900757, "signal/frontier_coverage_1/centered_abs_mean": 0.17627032101154327, "signal/frontier_coverage_1/group_std_mean": 0.22849083840847015, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034184883907437326, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025206655263900757, "signal/frontier_coverage_10/centered_abs_mean": 0.17627032101154327, "signal/frontier_coverage_10/group_std_mean": 0.22849083840847015, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.034184883907437326, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025206655263900757, "signal/frontier_coverage_15/centered_abs_mean": 0.17627032101154327, "signal/frontier_coverage_15/group_std_mean": 0.22849083840847015, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.034184883907437326, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0025206655263900757, "signal/frontier_coverage_20/centered_abs_mean": 0.16227281987667083, "signal/frontier_coverage_20/group_std_mean": 0.21120948791503907, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.031467581540346144, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002320501208305359, "signal/frontier_coverage_25/centered_abs_mean": 0.09800123274326325, "signal/frontier_coverage_25/group_std_mean": 0.1284342259168625, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018975771404802798, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014014176325872541, "signal/frontier_coverage_5/centered_abs_mean": 0.17627032101154327, "signal/frontier_coverage_5/group_std_mean": 0.22849083840847015, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034184883907437326, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025206655263900757, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317843735218048, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40056418180465697, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4519204914569855, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317843824625015, "step": 165 }, { "calibration/aurc": 0.12897152930722725, "calibration/batch_distribution_entropy": 0.9336085837086161, "calibration/buffer_distribution_entropy": 0.9569373573788684, "calibration/confidence_entropy": 0.47598063140135116, "calibration/coverage@0%": 0.0282175713914672, "calibration/coverage@1%": 0.0282175713914672, "calibration/coverage@10%": 0.45547725333516764, "calibration/coverage@15%": 0.6882758272706155, "calibration/coverage@20%": 0.8536569754028251, "calibration/coverage@25%": 0.9135881357982166, "calibration/coverage@30%": 0.9688458907456269, "calibration/coverage@5%": 0.19335502303391522, "calibration/ece": 0.11628936449888494, "calibration/mean_confidence": 0.629978176081961, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006857638888888884, "completions/max_length": 3500.4, "completions/max_terminated_length": 3500.4, "completions/mean_length": 651.5045166015625, "completions/mean_terminated_length": 656.08759765625, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.4079949000637492, "grad_norm": 0.0028907046653330326, "learning_rate": 4.086538461538462e-06, "loss": -0.0206, "num_tokens": 343775656.0, "reward": 1.0022429823875427, "reward_std": 0.1172142818570137, "rewards/accuracy_reward": 0.7177951335906982, "rewards/brier_reward": 0.8290866851806641, "rewards/confidence_uniqueness_reward": 0.9474273324012756, "rewards/format_reward": 0.9930555582046509, "rewards/frontier_coverage_0": 0.02630518595688045, "rewards/frontier_coverage_1": 0.02630518595688045, "rewards/frontier_coverage_10": 0.02630518595688045, "rewards/frontier_coverage_15": 0.02630518595688045, "rewards/frontier_coverage_20": 0.03013472445309162, "rewards/frontier_coverage_25": 0.05596128031611443, "rewards/frontier_coverage_5": 0.02630518595688045, "rewards/frontier_entropy_batch_reward": -0.33945736289024353, "signal/accuracy_reward/centered_abs_mean": 0.14037001132965088, "signal/accuracy_reward/group_std_mean": 0.18953485488891603, "signal/accuracy_reward/group_zero_std_frac": 0.4444444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9717832684516907, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07018500566482544, "signal/advantage_abs_mean": 0.7606682658195496, "signal/advantage_pre_scale_abs_mean": 0.08667757511138915, "signal/advantage_pre_scale_std": 0.13898763060569763, "signal/advantage_std": 0.9830931544303894, "signal/brier_reward/centered_abs_mean": 0.12069161683320999, "signal/brier_reward/group_std_mean": 0.15923964679241182, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16777142584323884, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012069161795079709, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025993842631578445, "signal/confidence_uniqueness_reward/group_std_mean": 0.04322640188038349, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035857266560196874, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002599384170025587, "signal/format_reward/centered_abs_mean": 0.012814670242369175, "signal/format_reward/group_std_mean": 0.027250981703400613, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08731953650712967, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006407335121184588, "signal/frontier_coverage_0/centered_abs_mean": 0.16332647502422332, "signal/frontier_coverage_0/group_std_mean": 0.21512860357761382, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03251932114362717, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002335568470880389, "signal/frontier_coverage_1/centered_abs_mean": 0.16332647502422332, "signal/frontier_coverage_1/group_std_mean": 0.21512860357761382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03251932114362717, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002335568470880389, "signal/frontier_coverage_10/centered_abs_mean": 0.16332647502422332, "signal/frontier_coverage_10/group_std_mean": 0.21512860357761382, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03251932114362717, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002335568470880389, "signal/frontier_coverage_15/centered_abs_mean": 0.16332647502422332, "signal/frontier_coverage_15/group_std_mean": 0.21512860357761382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03251932114362717, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002335568470880389, "signal/frontier_coverage_20/centered_abs_mean": 0.1249047115445137, "signal/frontier_coverage_20/group_std_mean": 0.1664392739534378, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024858567118644714, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001786137348972261, "signal/frontier_coverage_25/centered_abs_mean": 0.06625153496861458, "signal/frontier_coverage_25/group_std_mean": 0.08540613204240799, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01316972803324461, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0009473969344981015, "signal/frontier_coverage_5/centered_abs_mean": 0.16332647502422332, "signal/frontier_coverage_5/group_std_mean": 0.21512860357761382, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03251932114362717, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335568470880389, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3586002290248871, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4223356068134308, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49831503033638, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03586002364754677, "step": 170 }, { "calibration/aurc": 0.10554713818197388, "calibration/batch_distribution_entropy": 0.9680069135619707, "calibration/buffer_distribution_entropy": 0.9637635731465585, "calibration/confidence_entropy": 0.48449433471909914, "calibration/coverage@0%": 0.06158700047098672, "calibration/coverage@1%": 0.11685015836572354, "calibration/coverage@10%": 0.5591526611609484, "calibration/coverage@15%": 0.7391989335478721, "calibration/coverage@20%": 0.8493839489032611, "calibration/coverage@25%": 0.9431748076624615, "calibration/coverage@30%": 0.9935828877005347, "calibration/coverage@5%": 0.34651033746497045, "calibration/ece": 0.19750187584352247, "calibration/mean_confidence": 0.5472111987602275, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01710069444444442, "completions/max_length": 3739.6, "completions/max_terminated_length": 3739.6, "completions/mean_length": 642.9829956054688, "completions/mean_terminated_length": 654.1280395507813, "completions/min_length": 0.0, "completions/min_terminated_length": 161.8, "epoch": 0.4199947500656242, "grad_norm": 0.0027688194531947374, "learning_rate": 4.20673076923077e-06, "loss": -0.0329, "num_tokens": 354290788.0, "reward": 0.9870093107223511, "reward_std": 0.1387052059173584, "rewards/accuracy_reward": 0.694444453716278, "rewards/brier_reward": 0.8080734610557556, "rewards/confidence_uniqueness_reward": 0.9381106615066528, "rewards/format_reward": 0.9815104126930236, "rewards/frontier_coverage_0": 0.03009704500436783, "rewards/frontier_coverage_1": 0.03009704500436783, "rewards/frontier_coverage_10": 0.03009704500436783, "rewards/frontier_coverage_15": 0.029873811826109885, "rewards/frontier_coverage_20": 0.03443767204880714, "rewards/frontier_coverage_25": 0.07700650915503501, "rewards/frontier_coverage_5": 0.03009704500436783, "rewards/frontier_entropy_batch_reward": -0.2932896614074707, "signal/accuracy_reward/centered_abs_mean": 0.1675889790058136, "signal/accuracy_reward/group_std_mean": 0.22164686918258666, "signal/accuracy_reward/group_zero_std_frac": 0.3694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.03622624874115, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0837944895029068, "signal/advantage_abs_mean": 0.7442346930503845, "signal/advantage_pre_scale_abs_mean": 0.1012690544128418, "signal/advantage_pre_scale_std": 0.16532301008701325, "signal/advantage_std": 0.9832346558570861, "signal/brier_reward/centered_abs_mean": 0.1395539104938507, "signal/brier_reward/group_std_mean": 0.17974555790424346, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17347123324871064, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013955391198396682, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04072441086173058, "signal/confidence_uniqueness_reward/group_std_mean": 0.06787059977650642, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.050070621073246, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040724413003772495, "signal/format_reward/centered_abs_mean": 0.03021375834941864, "signal/format_reward/group_std_mean": 0.055511254072189334, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18372004330158234, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01510687917470932, "signal/frontier_coverage_0/centered_abs_mean": 0.2040518641471863, "signal/frontier_coverage_0/group_std_mean": 0.26374197006225586, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03635032847523689, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029179416596889496, "signal/frontier_coverage_1/centered_abs_mean": 0.2040518641471863, "signal/frontier_coverage_1/group_std_mean": 0.26374197006225586, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03635032847523689, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029179416596889496, "signal/frontier_coverage_10/centered_abs_mean": 0.2040518641471863, "signal/frontier_coverage_10/group_std_mean": 0.26374197006225586, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03635032847523689, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0029179416596889496, "signal/frontier_coverage_15/centered_abs_mean": 0.2027619570493698, "signal/frontier_coverage_15/group_std_mean": 0.2620996594429016, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.036139412224292754, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0028994960244745015, "signal/frontier_coverage_20/centered_abs_mean": 0.11568820774555207, "signal/frontier_coverage_20/group_std_mean": 0.15245377123355866, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020674470439553262, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016543413512408733, "signal/frontier_coverage_25/centered_abs_mean": 0.07027304172515869, "signal/frontier_coverage_25/group_std_mean": 0.0893691822886467, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01256355717778206, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001004904485307634, "signal/frontier_coverage_5/centered_abs_mean": 0.2040518641471863, "signal/frontier_coverage_5/group_std_mean": 0.26374197006225586, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03635032847523689, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0029179416596889496, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3387927234172821, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4076977729797363, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4237474262714386, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387927338480949, "step": 175 }, { "calibration/aurc": 0.10984275074604351, "calibration/batch_distribution_entropy": 0.9317807197735644, "calibration/buffer_distribution_entropy": 0.9691957594816449, "calibration/confidence_entropy": 0.5229593459895739, "calibration/coverage@0%": 0.09351867236232028, "calibration/coverage@1%": 0.10664203194237279, "calibration/coverage@10%": 0.6353146219608317, "calibration/coverage@15%": 0.7640279202423726, "calibration/coverage@20%": 0.857970503140216, "calibration/coverage@25%": 0.9141302660362711, "calibration/coverage@30%": 0.9506386281843202, "calibration/coverage@5%": 0.3119994714169439, "calibration/ece": 0.15585108375284248, "calibration/mean_confidence": 0.6108854745161485, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020833333333333325, "completions/max_length": 3743.2, "completions/max_terminated_length": 3743.2, "completions/mean_length": 588.3677001953125, "completions/mean_terminated_length": 600.9534301757812, "completions/min_length": 0.0, "completions/min_terminated_length": 177.8, "epoch": 0.4319946000674992, "grad_norm": 0.0034114145673811436, "learning_rate": 4.326923076923077e-06, "loss": -0.0492, "num_tokens": 364168752.0, "reward": 0.9856754302978515, "reward_std": 0.1391789510846138, "rewards/accuracy_reward": 0.7072048664093018, "rewards/brier_reward": 0.8044279932975769, "rewards/confidence_uniqueness_reward": 0.9358467817306518, "rewards/format_reward": 0.9791666626930237, "rewards/frontier_coverage_0": 0.003940967842936516, "rewards/frontier_coverage_1": 0.003940967842936516, "rewards/frontier_coverage_10": 0.003940967842936516, "rewards/frontier_coverage_15": 0.005169083643704653, "rewards/frontier_coverage_20": 0.02603294886648655, "rewards/frontier_coverage_25": 0.08643961250782013, "rewards/frontier_coverage_5": 0.003940967842936516, "rewards/frontier_entropy_batch_reward": -0.3344554424285889, "signal/accuracy_reward/centered_abs_mean": 0.14794379472732544, "signal/accuracy_reward/group_std_mean": 0.1974938452243805, "signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9245458722114563, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07397189736366272, "signal/advantage_abs_mean": 0.7475542187690735, "signal/advantage_pre_scale_abs_mean": 0.10126374959945679, "signal/advantage_pre_scale_std": 0.16907794177532195, "signal/advantage_std": 0.9832310676574707, "signal/brier_reward/centered_abs_mean": 0.12277870327234268, "signal/brier_reward/group_std_mean": 0.16272760629653932, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15394430458545685, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012277870066463947, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04358867034316063, "signal/confidence_uniqueness_reward/group_std_mean": 0.07177197933197021, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05445105582475662, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004358867136761546, "signal/format_reward/centered_abs_mean": 0.03280164897441864, "signal/format_reward/group_std_mean": 0.05909553095698357, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2044661432504654, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01640082448720932, "signal/frontier_coverage_0/centered_abs_mean": 0.15614522099494935, "signal/frontier_coverage_0/group_std_mean": 0.20503021478652955, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027985046431422234, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002232876745983958, "signal/frontier_coverage_1/centered_abs_mean": 0.15614522099494935, "signal/frontier_coverage_1/group_std_mean": 0.20503021478652955, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.027985046431422234, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002232876745983958, "signal/frontier_coverage_10/centered_abs_mean": 0.15614522099494935, "signal/frontier_coverage_10/group_std_mean": 0.20503021478652955, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027985046431422234, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002232876745983958, "signal/frontier_coverage_15/centered_abs_mean": 0.14456919133663176, "signal/frontier_coverage_15/group_std_mean": 0.19029048085212708, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02592291831970215, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0020673394668847323, "signal/frontier_coverage_20/centered_abs_mean": 0.06173940449953079, "signal/frontier_coverage_20/group_std_mean": 0.08230031579732895, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01108500100672245, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008828734629787504, "signal/frontier_coverage_25/centered_abs_mean": 0.07278510332107543, "signal/frontier_coverage_25/group_std_mean": 0.09394310265779496, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013049699179828168, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0010408269474282861, "signal/frontier_coverage_5/centered_abs_mean": 0.15614522099494935, "signal/frontier_coverage_5/group_std_mean": 0.20503021478652955, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027985046431422234, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002232876745983958, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34572470784187315, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41141175031661986, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4329352915287018, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03457247242331505, "step": 180 }, { "calibration/aurc": 0.15111109398849837, "calibration/batch_distribution_entropy": 0.9566626630611218, "calibration/buffer_distribution_entropy": 0.9735827817632021, "calibration/confidence_entropy": 0.48504004275740165, "calibration/coverage@0%": 0.04806912259147791, "calibration/coverage@1%": 0.04806912259147791, "calibration/coverage@10%": 0.3392314701725535, "calibration/coverage@15%": 0.5697117101815119, "calibration/coverage@20%": 0.8393673770355757, "calibration/coverage@25%": 0.9303207856198131, "calibration/coverage@30%": 0.9753280839895012, "calibration/coverage@5%": 0.0841373631284347, "calibration/ece": 0.18843498871215267, "calibration/mean_confidence": 0.5700071762667023, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444464, "completions/max_length": 3202.2, "completions/max_terminated_length": 3202.2, "completions/mean_length": 623.2223266601562, "completions/mean_terminated_length": 627.6921752929687, "completions/min_length": 0.0, "completions/min_terminated_length": 171.8, "epoch": 0.44399445006937416, "grad_norm": 0.003752121701836586, "learning_rate": 4.447115384615385e-06, "loss": -0.0148, "num_tokens": 374438321.0, "reward": 0.9892779350280761, "reward_std": 0.12258573472499848, "rewards/accuracy_reward": 0.6779513835906983, "rewards/brier_reward": 0.8098243832588196, "rewards/confidence_uniqueness_reward": 0.9467748641967774, "rewards/format_reward": 0.9930555582046509, "rewards/frontier_coverage_0": 0.03621828258037567, "rewards/frontier_coverage_1": 0.03621828258037567, "rewards/frontier_coverage_10": 0.03621828258037567, "rewards/frontier_coverage_15": 0.03810288608074188, "rewards/frontier_coverage_20": 0.0494185097515583, "rewards/frontier_coverage_25": 0.10902320891618729, "rewards/frontier_coverage_5": 0.03621828258037567, "rewards/frontier_entropy_batch_reward": -0.26767775118350984, "signal/accuracy_reward/centered_abs_mean": 0.15885416567325591, "signal/accuracy_reward/group_std_mean": 0.21068883836269378, "signal/accuracy_reward/group_zero_std_frac": 0.4, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.08146892786026, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07942708283662796, "signal/advantage_abs_mean": 0.753273355960846, "signal/advantage_pre_scale_abs_mean": 0.0918624609708786, "signal/advantage_pre_scale_std": 0.14460960030555725, "signal/advantage_std": 0.9831261992454529, "signal/brier_reward/centered_abs_mean": 0.12728632986545563, "signal/brier_reward/group_std_mean": 0.1645742654800415, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17356542944908143, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012728632800281048, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024496791139245033, "signal/confidence_uniqueness_reward/group_std_mean": 0.039763347059488294, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033292872831225395, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002449679165147245, "signal/format_reward/centered_abs_mean": 0.012217881716787815, "signal/format_reward/group_std_mean": 0.024743243120610714, "signal/format_reward/group_zero_std_frac": 0.8916666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08302046582102776, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006108940858393907, "signal/frontier_coverage_0/centered_abs_mean": 0.18843339681625365, "signal/frontier_coverage_0/group_std_mean": 0.24661438167095184, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03683679588139057, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026945976074784993, "signal/frontier_coverage_1/centered_abs_mean": 0.18843339681625365, "signal/frontier_coverage_1/group_std_mean": 0.24661438167095184, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03683679588139057, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026945976074784993, "signal/frontier_coverage_10/centered_abs_mean": 0.18843339681625365, "signal/frontier_coverage_10/group_std_mean": 0.24661438167095184, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03683679588139057, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0026945976074784993, "signal/frontier_coverage_15/centered_abs_mean": 0.1579059839248657, "signal/frontier_coverage_15/group_std_mean": 0.20779311954975127, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.030845557898283006, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002258055657148361, "signal/frontier_coverage_20/centered_abs_mean": 0.06493410244584083, "signal/frontier_coverage_20/group_std_mean": 0.08309292197227477, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012669848836958408, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009285576525144279, "signal/frontier_coverage_25/centered_abs_mean": 0.07968129962682724, "signal/frontier_coverage_25/group_std_mean": 0.10209451913833618, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015482756868004799, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0011394425993785263, "signal/frontier_coverage_5/centered_abs_mean": 0.18843339681625365, "signal/frontier_coverage_5/group_std_mean": 0.24661438167095184, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03683679588139057, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026945976074784993, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32011584639549256, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3904758870601654, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4340623140335083, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03201158419251442, "step": 185 }, { "calibration/aurc": 0.14893818169032144, "calibration/batch_distribution_entropy": 0.9419705194159368, "calibration/buffer_distribution_entropy": 0.9776862355785289, "calibration/confidence_entropy": 0.46790947060759625, "calibration/coverage@0%": 0.028810999020120808, "calibration/coverage@1%": 0.08416347943787539, "calibration/coverage@10%": 0.36775327198894214, "calibration/coverage@15%": 0.48721496613020426, "calibration/coverage@20%": 0.6907456911063473, "calibration/coverage@25%": 0.9141684616811642, "calibration/coverage@30%": 0.9921424743373046, "calibration/coverage@5%": 0.2703035088939899, "calibration/ece": 0.169163026326467, "calibration/mean_confidence": 0.6032991320164142, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666652, "completions/max_length": 3103.0, "completions/max_terminated_length": 3103.0, "completions/mean_length": 674.9318725585938, "completions/mean_terminated_length": 677.7634643554687, "completions/min_length": 0.0, "completions/min_terminated_length": 210.6, "epoch": 0.45599430007124914, "grad_norm": 0.004520408343523741, "learning_rate": 4.567307692307692e-06, "loss": 0.0002, "num_tokens": 385296480.0, "reward": 1.0066398620605468, "reward_std": 0.119065323472023, "rewards/accuracy_reward": 0.71484375, "rewards/brier_reward": 0.8188209176063538, "rewards/confidence_uniqueness_reward": 0.9446268916130066, "rewards/format_reward": 0.9955729126930237, "rewards/frontier_coverage_0": 0.02538626336026937, "rewards/frontier_coverage_1": 0.02538626336026937, "rewards/frontier_coverage_10": 0.02538626336026937, "rewards/frontier_coverage_15": 0.03268268760293722, "rewards/frontier_coverage_20": 0.06400988847017289, "rewards/frontier_coverage_25": 0.13977613300085068, "rewards/frontier_coverage_5": 0.02538626336026937, "rewards/frontier_entropy_batch_reward": -0.2974688410758972, "signal/accuracy_reward/centered_abs_mean": 0.1567545562982559, "signal/accuracy_reward/group_std_mean": 0.2077132999897003, "signal/accuracy_reward/group_zero_std_frac": 0.4000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0586259484291076, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07837727814912795, "signal/advantage_abs_mean": 0.7523825168609619, "signal/advantage_pre_scale_abs_mean": 0.08916229903697967, "signal/advantage_pre_scale_std": 0.1393636554479599, "signal/advantage_std": 0.9831319808959961, "signal/brier_reward/centered_abs_mean": 0.12696570456027984, "signal/brier_reward/group_std_mean": 0.1662917345762253, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17168731689453126, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01269657090306282, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021785502135753632, "signal/confidence_uniqueness_reward/group_std_mean": 0.0352114200592041, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02974345088005066, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021785502322018147, "signal/format_reward/centered_abs_mean": 0.008241102332249284, "signal/format_reward/group_std_mean": 0.018836847506463528, "signal/format_reward/group_zero_std_frac": 0.9111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05659685656428337, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004120551166124642, "signal/frontier_coverage_0/centered_abs_mean": 0.1937331348657608, "signal/frontier_coverage_0/group_std_mean": 0.25190245509147646, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037381567806005475, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002770383842289448, "signal/frontier_coverage_1/centered_abs_mean": 0.1937331348657608, "signal/frontier_coverage_1/group_std_mean": 0.25190245509147646, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037381567806005475, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002770383842289448, "signal/frontier_coverage_10/centered_abs_mean": 0.1937331348657608, "signal/frontier_coverage_10/group_std_mean": 0.25190245509147646, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.037381567806005475, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002770383842289448, "signal/frontier_coverage_15/centered_abs_mean": 0.1363177239894867, "signal/frontier_coverage_15/group_std_mean": 0.17888247072696686, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026297363638877868, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001949343434534967, "signal/frontier_coverage_20/centered_abs_mean": 0.06703521385788917, "signal/frontier_coverage_20/group_std_mean": 0.08440038710832595, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013008480705320836, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009586036554537714, "signal/frontier_coverage_25/centered_abs_mean": 0.09278584271669388, "signal/frontier_coverage_25/group_std_mean": 0.11911198645830154, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01804537754505873, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013268375769257545, "signal/frontier_coverage_5/centered_abs_mean": 0.1937331348657608, "signal/frontier_coverage_5/group_std_mean": 0.25190245509147646, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037381567806005475, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002770383842289448, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34055811166763306, "signal/frontier_entropy_batch_reward/group_std_mean": 0.409438556432724, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4618579685688019, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0340558111667633, "step": 190 }, { "calibration/aurc": 0.207823017908103, "calibration/batch_distribution_entropy": 0.9731538814379868, "calibration/buffer_distribution_entropy": 0.979395058792719, "calibration/confidence_entropy": 0.5097420144557466, "calibration/coverage@0%": 0.011067546149970225, "calibration/coverage@1%": 0.011067546149970225, "calibration/coverage@10%": 0.2447367693875681, "calibration/coverage@15%": 0.42390932553141825, "calibration/coverage@20%": 0.5590823296623799, "calibration/coverage@25%": 0.7296307994788032, "calibration/coverage@30%": 0.8170445344129554, "calibration/coverage@5%": 0.09602072282534012, "calibration/ece": 0.14993556714871595, "calibration/mean_confidence": 0.541932387510283, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004253472222222254, "completions/max_length": 3484.8, "completions/max_terminated_length": 3484.8, "completions/mean_length": 737.6642333984375, "completions/mean_terminated_length": 740.8277221679688, "completions/min_length": 0.0, "completions/min_terminated_length": 193.2, "epoch": 0.46799415007312406, "grad_norm": 0.005371047183871269, "learning_rate": 4.6875000000000004e-06, "loss": -0.0067, "num_tokens": 396875236.0, "reward": 0.9803600549697876, "reward_std": 0.12534761279821396, "rewards/accuracy_reward": 0.6599826455116272, "rewards/brier_reward": 0.8082576751708984, "rewards/confidence_uniqueness_reward": 0.9455493211746215, "rewards/format_reward": 0.9950520873069764, "rewards/frontier_coverage_0": 0.038907221704721454, "rewards/frontier_coverage_1": 0.038907221704721454, "rewards/frontier_coverage_10": 0.038907221704721454, "rewards/frontier_coverage_15": 0.04050202891230583, "rewards/frontier_coverage_20": 0.05746806710958481, "rewards/frontier_coverage_25": 0.11870489567518235, "rewards/frontier_coverage_5": 0.038907221704721454, "rewards/frontier_entropy_batch_reward": -0.27861965298652647, "signal/accuracy_reward/centered_abs_mean": 0.16305881440639497, "signal/accuracy_reward/group_std_mean": 0.21655304729938507, "signal/accuracy_reward/group_zero_std_frac": 0.38055555820465087, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9943400859832764, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08152940720319748, "signal/advantage_abs_mean": 0.7515913486480713, "signal/advantage_pre_scale_abs_mean": 0.09461424648761749, "signal/advantage_pre_scale_std": 0.14401794373989105, "signal/advantage_std": 0.9832587242126465, "signal/brier_reward/centered_abs_mean": 0.1281513586640358, "signal/brier_reward/group_std_mean": 0.1668252170085907, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1567935198545456, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012815136276185513, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022215939313173293, "signal/confidence_uniqueness_reward/group_std_mean": 0.03495916984975338, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02720388360321522, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022215940058231353, "signal/format_reward/centered_abs_mean": 0.008881293330341577, "signal/format_reward/group_std_mean": 0.018633856624364852, "signal/format_reward/group_zero_std_frac": 0.9166666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05408404804766178, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004440646665170789, "signal/frontier_coverage_0/centered_abs_mean": 0.1843973457813263, "signal/frontier_coverage_0/group_std_mean": 0.2422202616930008, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03211207017302513, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002636882150545716, "signal/frontier_coverage_1/centered_abs_mean": 0.1843973457813263, "signal/frontier_coverage_1/group_std_mean": 0.2422202616930008, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03211207017302513, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002636882150545716, "signal/frontier_coverage_10/centered_abs_mean": 0.1843973457813263, "signal/frontier_coverage_10/group_std_mean": 0.2422202616930008, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03211207017302513, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002636882150545716, "signal/frontier_coverage_15/centered_abs_mean": 0.10657975375652314, "signal/frontier_coverage_15/group_std_mean": 0.1422801896929741, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018575644865632058, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015240905107930303, "signal/frontier_coverage_20/centered_abs_mean": 0.062387507408857346, "signal/frontier_coverage_20/group_std_mean": 0.08020298928022385, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010951629839837551, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0008921413565985859, "signal/frontier_coverage_25/centered_abs_mean": 0.09733576774597168, "signal/frontier_coverage_25/group_std_mean": 0.12596500515937806, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017124542221426963, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0013919014250859617, "signal/frontier_coverage_5/centered_abs_mean": 0.1843973457813263, "signal/frontier_coverage_5/group_std_mean": 0.2422202616930008, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03211207017302513, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002636882150545716, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3384637773036957, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40933026671409606, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41559439301490786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03384637758135796, "step": 195 }, { "calibration/aurc": 0.13457394207174084, "calibration/batch_distribution_entropy": 0.9660387662645806, "calibration/buffer_distribution_entropy": 0.9791850307731931, "calibration/confidence_entropy": 0.49642053345282927, "calibration/coverage@0%": 0.03922876835584915, "calibration/coverage@1%": 0.03922876835584915, "calibration/coverage@10%": 0.5373616414095859, "calibration/coverage@15%": 0.6125576703375405, "calibration/coverage@20%": 0.8448078692137642, "calibration/coverage@25%": 0.9158964487671957, "calibration/coverage@30%": 0.9644429098943311, "calibration/coverage@5%": 0.21529847983954314, "calibration/ece": 0.17658624157088154, "calibration/mean_confidence": 0.5825247638644665, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3652.0, "completions/max_terminated_length": 3652.0, "completions/mean_length": 811.3713623046875, "completions/mean_terminated_length": 814.8777954101563, "completions/min_length": 0.0, "completions/min_terminated_length": 248.6, "epoch": 0.47999400007499904, "grad_norm": 0.005334607325494289, "learning_rate": 4.807692307692308e-06, "loss": 0.0016, "num_tokens": 409290042.0, "reward": 0.9891631245613098, "reward_std": 0.12703752517700195, "rewards/accuracy_reward": 0.6722222208976746, "rewards/brier_reward": 0.8120310187339783, "rewards/confidence_uniqueness_reward": 0.9465744733810425, "rewards/format_reward": 0.9953993082046508, "rewards/frontier_coverage_0": 0.04383779689669609, "rewards/frontier_coverage_1": 0.04383779689669609, "rewards/frontier_coverage_10": 0.043836929649114606, "rewards/frontier_coverage_15": 0.04154842011630535, "rewards/frontier_coverage_20": 0.07352328151464463, "rewards/frontier_coverage_25": 0.144034680724144, "rewards/frontier_coverage_5": 0.04383779689669609, "rewards/frontier_entropy_batch_reward": -0.26720958948135376, "signal/accuracy_reward/centered_abs_mean": 0.17158203125, "signal/accuracy_reward/group_std_mean": 0.22611615359783171, "signal/accuracy_reward/group_zero_std_frac": 0.35833333134651185, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0878794550895692, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.085791015625, "signal/advantage_abs_mean": 0.7504582166671753, "signal/advantage_pre_scale_abs_mean": 0.09581226408481598, "signal/advantage_pre_scale_std": 0.14724062085151673, "signal/advantage_std": 0.9832112431526184, "signal/brier_reward/centered_abs_mean": 0.129945769906044, "signal/brier_reward/group_std_mean": 0.1687883585691452, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16505386531352997, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012994576990604401, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020943275094032286, "signal/confidence_uniqueness_reward/group_std_mean": 0.03341059945523739, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026448329165577888, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002094327541999519, "signal/format_reward/centered_abs_mean": 0.008273654524236918, "signal/format_reward/group_std_mean": 0.017920159548521043, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.051681911945343016, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004136827262118459, "signal/frontier_coverage_0/centered_abs_mean": 0.19716570079326629, "signal/frontier_coverage_0/group_std_mean": 0.25832314491271974, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035849443450570104, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0028194695245474578, "signal/frontier_coverage_1/centered_abs_mean": 0.19716570079326629, "signal/frontier_coverage_1/group_std_mean": 0.25832314491271974, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035849443450570104, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0028194695245474578, "signal/frontier_coverage_10/centered_abs_mean": 0.197145015001297, "signal/frontier_coverage_10/group_std_mean": 0.25829660296440127, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.035845917835831645, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00281917373649776, "signal/frontier_coverage_15/centered_abs_mean": 0.09588005095720291, "signal/frontier_coverage_15/group_std_mean": 0.126824252307415, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01748826839029789, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013710847357288002, "signal/frontier_coverage_20/centered_abs_mean": 0.06827102303504944, "signal/frontier_coverage_20/group_std_mean": 0.08614148795604706, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012408490851521492, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0009762756177224219, "signal/frontier_coverage_25/centered_abs_mean": 0.10345425456762314, "signal/frontier_coverage_25/group_std_mean": 0.1329213485121727, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018771519511938096, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0014793958282098175, "signal/frontier_coverage_5/centered_abs_mean": 0.19716570079326629, "signal/frontier_coverage_5/group_std_mean": 0.25832314491271974, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035849443450570104, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0028194695245474578, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3215024173259735, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3918437123298645, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40881314873695374, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03215024285018444, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.12598091095806183, "eval_calibration/batch_distribution_entropy": 0.9433110585373359, "eval_calibration/buffer_distribution_entropy": 0.9790980924717448, "eval_calibration/confidence_entropy": 0.47056662694642887, "eval_calibration/coverage@0%": 0.2814180107526882, "eval_calibration/coverage@1%": 0.2814180107526882, "eval_calibration/coverage@10%": 0.5105846774193549, "eval_calibration/coverage@15%": 0.6851478494623656, "eval_calibration/coverage@20%": 0.8158602150537634, "eval_calibration/coverage@25%": 0.8785282258064516, "eval_calibration/coverage@30%": 0.9680779569892474, "eval_calibration/coverage@5%": 0.3022513440860215, "eval_calibration/ece": 0.19921678024193548, "eval_calibration/mean_confidence": 0.5472353030913978, "eval_completions/clipped_ratio": 0.004340277777777772, "eval_completions/max_length": 2724.8333333333335, "eval_completions/max_terminated_length": 2724.8333333333335, "eval_completions/mean_length": 862.3639221191406, "eval_completions/mean_terminated_length": 866.1793721516927, "eval_completions/min_length": 175.5, "eval_completions/min_terminated_length": 317.0, "eval_loss": 0.0, "eval_num_tokens": 409290042.0, "eval_reward": 0.9056529303391775, "eval_reward_std": 0.2260978470245997, "eval_rewards/accuracy_reward": 0.6614583333333334, "eval_rewards/brier_reward": 0.8041390081246694, "eval_rewards/confidence_uniqueness_reward": 0.8975274364153544, "eval_rewards/format_reward": 0.995659718910853, "eval_rewards/frontier_coverage_0": 0.046538424057265125, "eval_rewards/frontier_coverage_1": 0.046538424057265125, "eval_rewards/frontier_coverage_10": 0.046553870352605976, "eval_rewards/frontier_coverage_15": 0.04254821936289469, "eval_rewards/frontier_coverage_20": 0.07779269541303317, "eval_rewards/frontier_coverage_25": 0.14755996068318686, "eval_rewards/frontier_coverage_5": 0.046538424057265125, "eval_rewards/frontier_entropy_batch_reward": -0.995659718910853, "eval_runtime": 176.4738, "eval_samples_per_second": 5.667, "eval_signal/accuracy_reward/centered_abs_mean": 0.4331597238779068, "eval_signal/accuracy_reward/group_std_mean": 0.47175751626491547, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9675879975159963, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2165798619389534, "eval_signal/advantage_abs_mean": 0.8864832321802775, "eval_signal/advantage_pre_scale_abs_mean": 0.20117887606223425, "eval_signal/advantage_pre_scale_std": 0.22427177677551904, "eval_signal/advantage_std": 0.9863830308119456, "eval_signal/brier_reward/centered_abs_mean": 0.1935593287150065, "eval_signal/brier_reward/group_std_mean": 0.24976551036039987, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08630472545822461, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019355932716280222, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04233323782682419, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.0602133646607399, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01887299648175637, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042333238913367195, "eval_signal/format_reward/centered_abs_mean": 0.008409287935743729, "eval_signal/format_reward/group_std_mean": 0.02455231888840596, "eval_signal/format_reward/group_zero_std_frac": 0.8611111243565878, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018258365492026012, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.331520880262057, "eval_signal/frontier_coverage_0/group_std_mean": 0.4429255078236262, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021158167781929176, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004740748554468155, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.331520880262057, "eval_signal/frontier_coverage_1/group_std_mean": 0.4429255078236262, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021158167781929176, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004740748554468155, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.32940207918485004, "eval_signal/frontier_coverage_10/group_std_mean": 0.4403219074010849, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0210230794424812, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00471044968192776, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12829044088721275, "eval_signal/frontier_coverage_15/group_std_mean": 0.18191451330979666, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008187860560913881, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018345532977643113, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.10106631244222324, "eval_signal/frontier_coverage_20/group_std_mean": 0.12813800697525343, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.006464191324387987, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014452482379662495, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.19661433746417364, "eval_signal/frontier_coverage_25/group_std_mean": 0.2403616358836492, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.012586226065953573, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0028115849321087203, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.331520880262057, "eval_signal/frontier_coverage_5/group_std_mean": 0.4429255078236262, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.021158167781929176, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004740748554468155, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008409287935743729, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.02455231888840596, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111243565878, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0036516734398901463, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008409288129769266, "eval_steps_per_second": 0.034, "step": 200 }, { "epoch": 0.47999400007499904, "step": 200, "train_probe_calibration/aurc": 0.13770699054811322, "train_probe_calibration/batch_distribution_entropy": 0.905888972116789, "train_probe_calibration/buffer_distribution_entropy": 0.9790985272707301, "train_probe_calibration/confidence_entropy": 0.4350313484976907, "train_probe_calibration/coverage@0%": 0.21404569892473116, "train_probe_calibration/coverage@1%": 0.21404569892473116, "train_probe_calibration/coverage@10%": 0.5129368279569892, "train_probe_calibration/coverage@15%": 0.6231518817204301, "train_probe_calibration/coverage@20%": 0.7699932795698925, "train_probe_calibration/coverage@25%": 0.8850806451612904, "train_probe_calibration/coverage@30%": 0.96875, "train_probe_calibration/coverage@5%": 0.21404569892473116, "train_probe_calibration/ece": 0.21378480174731182, "train_probe_calibration/mean_confidence": 0.5547951041666667, "train_probe_completions/clipped_ratio": 0.006076388888888895, "train_probe_completions/max_length": 3044.8333333333335, "train_probe_completions/max_terminated_length": 3044.8333333333335, "train_probe_completions/mean_length": 896.2515767415365, "train_probe_completions/mean_terminated_length": 901.6541951497396, "train_probe_completions/min_length": 72.66666666666667, "train_probe_completions/min_terminated_length": 322.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 409290042.0, "train_probe_reward": 0.9188018341859182, "train_probe_reward_std": 0.22796061635017395, "train_probe_rewards/accuracy_reward": 0.6901041567325592, "train_probe_rewards/brier_reward": 0.8122913241386414, "train_probe_rewards/confidence_uniqueness_reward": 0.8891710241635641, "train_probe_rewards/format_reward": 0.9930555621782938, "train_probe_rewards/frontier_coverage_0": 0.03560524402807156, "train_probe_rewards/frontier_coverage_1": 0.03560524402807156, "train_probe_rewards/frontier_coverage_10": 0.03576213649163643, "train_probe_rewards/frontier_coverage_15": 0.043352426340182625, "train_probe_rewards/frontier_coverage_20": 0.08913688485821088, "train_probe_rewards/frontier_coverage_25": 0.17117570588986078, "train_probe_rewards/frontier_coverage_5": 0.03560524402807156, "train_probe_rewards/frontier_entropy_batch_reward": -0.9930555621782938, "train_probe_runtime": 200.3449, "train_probe_samples_per_second": 4.991, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4161783804496129, "train_probe_signal/accuracy_reward/group_std_mean": 0.46240218977133435, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9225195546944936, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20808919022480646, "train_probe_signal/advantage_abs_mean": 0.8652854164441427, "train_probe_signal/advantage_pre_scale_abs_mean": 0.19722345719734827, "train_probe_signal/advantage_pre_scale_std": 0.22617560625076294, "train_probe_signal/advantage_std": 0.9863866766293844, "train_probe_signal/brier_reward/centered_abs_mean": 0.19129946579535803, "train_probe_signal/brier_reward/group_std_mean": 0.2488196368018786, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08478038261334102, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01912994698310892, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04761647308866183, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07206781022250652, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021060552758475144, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004761647588262956, "train_probe_signal/format_reward/centered_abs_mean": 0.013346354011446238, "train_probe_signal/format_reward/group_std_mean": 0.0362943010404706, "train_probe_signal/format_reward/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.029019565011064213, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006673177005723119, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.30989480515321094, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.42381706337134045, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.019649510582288105, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004431495947452883, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.30989480515321094, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.42381706337134045, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.019649510582288105, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004431495947452883, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.30765336255232495, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.42102983097235364, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019507159168521564, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.004399443161673844, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11950497577587764, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.17058095087607703, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007572836941108108, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017089210644674797, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.10610838606953621, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.13226349900166193, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00672729096064965, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015173499123193324, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.20468894888957342, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.2476311499873797, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01297900810216864, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00292705197352916, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30989480515321094, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.42381706337134045, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019649510582288105, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004431495947452883, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013346354011446238, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0362943010404706, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555820465088, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005803913033256928, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013346354632327955, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.12065447205364044, "calibration/batch_distribution_entropy": 0.9478632617089037, "calibration/buffer_distribution_entropy": 0.9794554977649417, "calibration/confidence_entropy": 0.47588792603618596, "calibration/coverage@0%": 0.038099784583908614, "calibration/coverage@1%": 0.038099784583908614, "calibration/coverage@10%": 0.4993634915861277, "calibration/coverage@15%": 0.7511866752445787, "calibration/coverage@20%": 0.8759859472612195, "calibration/coverage@25%": 0.9167785326644158, "calibration/coverage@30%": 0.9570680628272251, "calibration/coverage@5%": 0.22011283998377826, "calibration/ece": 0.12989965288014366, "calibration/mean_confidence": 0.6225485873164307, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008159722222222231, "completions/max_length": 3735.0, "completions/max_terminated_length": 3735.0, "completions/mean_length": 840.884716796875, "completions/mean_terminated_length": 847.7636108398438, "completions/min_length": 0.0, "completions/min_terminated_length": 228.8, "epoch": 0.491993850076874, "grad_norm": 0.005042714532464743, "learning_rate": 4.927884615384616e-06, "loss": -0.0202, "num_tokens": 422042986.0, "reward": 1.000865375995636, "reward_std": 0.13984827995300292, "rewards/accuracy_reward": 0.706250011920929, "rewards/brier_reward": 0.8168227672576904, "rewards/confidence_uniqueness_reward": 0.9406297087669373, "rewards/format_reward": 0.9916666865348815, "rewards/frontier_coverage_0": 0.02166607202962041, "rewards/frontier_coverage_1": 0.02166607202962041, "rewards/frontier_coverage_10": 0.022014103550463914, "rewards/frontier_coverage_15": 0.040804407000541686, "rewards/frontier_coverage_20": 0.09391007274389267, "rewards/frontier_coverage_25": 0.1805516004562378, "rewards/frontier_coverage_5": 0.02166607202962041, "rewards/frontier_entropy_batch_reward": -0.2959077060222626, "signal/accuracy_reward/centered_abs_mean": 0.18302951455116273, "signal/accuracy_reward/group_std_mean": 0.24282491207122803, "signal/accuracy_reward/group_zero_std_frac": 0.3083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0774194717407226, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09151475727558137, "signal/advantage_abs_mean": 0.7390522360801697, "signal/advantage_pre_scale_abs_mean": 0.10357871353626251, "signal/advantage_pre_scale_std": 0.16100256741046906, "signal/advantage_std": 0.9833029270172119, "signal/brier_reward/centered_abs_mean": 0.130741947889328, "signal/brier_reward/group_std_mean": 0.17220645546913146, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15355044603347778, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013074194081127644, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02761349529027939, "signal/confidence_uniqueness_reward/group_std_mean": 0.04560641422867775, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032353409379720685, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027613495476543903, "signal/format_reward/centered_abs_mean": 0.014680989645421505, "signal/format_reward/group_std_mean": 0.03030678890645504, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0853647917509079, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007340494822710752, "signal/frontier_coverage_0/centered_abs_mean": 0.18313942551612855, "signal/frontier_coverage_0/group_std_mean": 0.24227609634399414, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030802012979984285, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026188937481492758, "signal/frontier_coverage_1/centered_abs_mean": 0.18313942551612855, "signal/frontier_coverage_1/group_std_mean": 0.24227609634399414, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030802012979984285, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026188937481492758, "signal/frontier_coverage_10/centered_abs_mean": 0.18102456629276276, "signal/frontier_coverage_10/group_std_mean": 0.23960494697093965, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.030447249487042426, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0025886511895805596, "signal/frontier_coverage_15/centered_abs_mean": 0.07476659417152405, "signal/frontier_coverage_15/group_std_mean": 0.0987214908003807, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012579312175512313, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010691623203456402, "signal/frontier_coverage_20/centered_abs_mean": 0.0753616064786911, "signal/frontier_coverage_20/group_std_mean": 0.09655838012695313, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012687078863382339, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0010776709532365203, "signal/frontier_coverage_25/centered_abs_mean": 0.11949286609888077, "signal/frontier_coverage_25/group_std_mean": 0.15502755641937255, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.020129023864865304, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017087480053305625, "signal/frontier_coverage_5/centered_abs_mean": 0.18313942551612855, "signal/frontier_coverage_5/group_std_mean": 0.24227609634399414, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030802012979984285, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026188937481492758, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3225748658180237, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3920146644115448, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38031149506568906, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03225748799741268, "step": 205 }, { "calibration/aurc": 0.1283698895549012, "calibration/batch_distribution_entropy": 0.9059175239789449, "calibration/buffer_distribution_entropy": 0.9786380154140943, "calibration/confidence_entropy": 0.511529493161178, "calibration/coverage@0%": 0.0743731477469488, "calibration/coverage@1%": 0.07960874984118964, "calibration/coverage@10%": 0.45535827048147715, "calibration/coverage@15%": 0.6292367935026641, "calibration/coverage@20%": 0.8990382170848402, "calibration/coverage@25%": 0.9410870313292656, "calibration/coverage@30%": 0.9687391209747606, "calibration/coverage@5%": 0.20523881659123244, "calibration/ece": 0.1291991111896027, "calibration/mean_confidence": 0.6613698091778775, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006336805555555536, "completions/max_length": 3717.2, "completions/max_terminated_length": 3717.2, "completions/mean_length": 741.9556518554688, "completions/mean_terminated_length": 746.6578857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 186.8, "epoch": 0.503993700078749, "grad_norm": 0.003985654562711716, "learning_rate": 4.987980769230769e-06, "loss": -0.0183, "num_tokens": 433706155.0, "reward": 0.9879561424255371, "reward_std": 0.1374760627746582, "rewards/accuracy_reward": 0.6838541626930237, "rewards/brier_reward": 0.828122079372406, "rewards/confidence_uniqueness_reward": 0.9403419256210327, "rewards/format_reward": 0.9931423664093018, "rewards/frontier_coverage_0": 0.0342383430339396, "rewards/frontier_coverage_1": 0.0342383430339396, "rewards/frontier_coverage_10": 0.0340392192825675, "rewards/frontier_coverage_15": 0.03967601284384727, "rewards/frontier_coverage_20": 0.08980410993099212, "rewards/frontier_coverage_25": 0.1722535938024521, "rewards/frontier_coverage_5": 0.0342383430339396, "rewards/frontier_entropy_batch_reward": -0.3365890234708786, "signal/accuracy_reward/centered_abs_mean": 0.17376301884651185, "signal/accuracy_reward/group_std_mean": 0.22921195328235627, "signal/accuracy_reward/group_zero_std_frac": 0.347222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0327161431312561, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08688150942325593, "signal/advantage_abs_mean": 0.7446362137794494, "signal/advantage_pre_scale_abs_mean": 0.10320448130369186, "signal/advantage_pre_scale_std": 0.1599856436252594, "signal/advantage_std": 0.9832797527313233, "signal/brier_reward/centered_abs_mean": 0.12006305009126664, "signal/brier_reward/group_std_mean": 0.15930896997451782, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14385341256856918, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012006304785609245, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024894104152917863, "signal/confidence_uniqueness_reward/group_std_mean": 0.04236802905797958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03055493049323559, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024894104804843663, "signal/format_reward/centered_abs_mean": 0.01252712681889534, "signal/format_reward/group_std_mean": 0.027480727061629295, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07819846607744693, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00626356340944767, "signal/frontier_coverage_0/centered_abs_mean": 0.15801767706871034, "signal/frontier_coverage_0/group_std_mean": 0.20844950377941132, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.026977039873600006, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002259652712382376, "signal/frontier_coverage_1/centered_abs_mean": 0.15801767706871034, "signal/frontier_coverage_1/group_std_mean": 0.20844950377941132, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.026977039873600006, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002259652712382376, "signal/frontier_coverage_10/centered_abs_mean": 0.15202154815196992, "signal/frontier_coverage_10/group_std_mean": 0.20094724893569946, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02595903053879738, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00217390819452703, "signal/frontier_coverage_15/centered_abs_mean": 0.05612077414989471, "signal/frontier_coverage_15/group_std_mean": 0.07349130362272263, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.009616562630981207, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008025271003134548, "signal/frontier_coverage_20/centered_abs_mean": 0.07690738439559937, "signal/frontier_coverage_20/group_std_mean": 0.09875056445598603, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013239230774343014, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001099775591865182, "signal/frontier_coverage_25/centered_abs_mean": 0.12822509557008743, "signal/frontier_coverage_25/group_std_mean": 0.16526393294334413, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02204398587346077, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018336188746616243, "signal/frontier_coverage_5/centered_abs_mean": 0.15801767706871034, "signal/frontier_coverage_5/group_std_mean": 0.20844950377941132, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026977039873600006, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002259652712382376, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.343439394235611, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41007362604141234, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4145658850669861, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03434394001960754, "step": 210 }, { "calibration/aurc": 0.14320745326360448, "calibration/batch_distribution_entropy": 0.9150604212103024, "calibration/buffer_distribution_entropy": 0.9777807684637884, "calibration/confidence_entropy": 0.47859022632943315, "calibration/coverage@0%": 0.10660243473326474, "calibration/coverage@1%": 0.1176842289285154, "calibration/coverage@10%": 0.24826614573394507, "calibration/coverage@15%": 0.6604307322085081, "calibration/coverage@20%": 0.8212603585921796, "calibration/coverage@25%": 0.932620320855615, "calibration/coverage@30%": 0.9577540106951872, "calibration/coverage@5%": 0.18734122101294812, "calibration/ece": 0.17545954965586857, "calibration/mean_confidence": 0.6471332423964474, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014236111111111116, "completions/max_length": 3788.4, "completions/max_terminated_length": 3788.4, "completions/mean_length": 807.5797729492188, "completions/mean_terminated_length": 819.5307006835938, "completions/min_length": 0.0, "completions/min_terminated_length": 148.8, "epoch": 0.515993550080624, "grad_norm": 0.003014960326254368, "learning_rate": 4.957932692307692e-06, "loss": -0.0452, "num_tokens": 446088290.0, "reward": 0.9924614787101745, "reward_std": 0.13940343707799913, "rewards/accuracy_reward": 0.70390625, "rewards/brier_reward": 0.8037495970726013, "rewards/confidence_uniqueness_reward": 0.9330443620681763, "rewards/format_reward": 0.9836805462837219, "rewards/frontier_coverage_0": 0.013330519822193309, "rewards/frontier_coverage_1": 0.013330519822193309, "rewards/frontier_coverage_10": 0.01646778262220323, "rewards/frontier_coverage_15": 0.04249517768621445, "rewards/frontier_coverage_20": 0.09611388593912125, "rewards/frontier_coverage_25": 0.17800917625427246, "rewards/frontier_coverage_5": 0.013330519822193309, "rewards/frontier_entropy_batch_reward": -0.3034632682800293, "signal/accuracy_reward/centered_abs_mean": 0.149462890625, "signal/accuracy_reward/group_std_mean": 0.2026258021593094, "signal/accuracy_reward/group_zero_std_frac": 0.3944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496671915054321, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0747314453125, "signal/advantage_abs_mean": 0.7293966054916382, "signal/advantage_pre_scale_abs_mean": 0.09994653314352035, "signal/advantage_pre_scale_std": 0.1655849814414978, "signal/advantage_std": 0.9832081437110901, "signal/brier_reward/centered_abs_mean": 0.13938885033130646, "signal/brier_reward/group_std_mean": 0.1821478396654129, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17650045454502106, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013938885927200318, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03878090418875217, "signal/confidence_uniqueness_reward/group_std_mean": 0.06749042719602585, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04819239303469658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003878090484067798, "signal/format_reward/centered_abs_mean": 0.02853732667863369, "signal/format_reward/group_std_mean": 0.055648359656333926, "signal/format_reward/group_zero_std_frac": 0.7666666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17448110282421112, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014268663339316845, "signal/frontier_coverage_0/centered_abs_mean": 0.17390457689762115, "signal/frontier_coverage_0/group_std_mean": 0.22902662456035613, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03165303654968739, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024868354201316835, "signal/frontier_coverage_1/centered_abs_mean": 0.17390457689762115, "signal/frontier_coverage_1/group_std_mean": 0.22902662456035613, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03165303654968739, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024868354201316835, "signal/frontier_coverage_10/centered_abs_mean": 0.16191380023956298, "signal/frontier_coverage_10/group_std_mean": 0.21419888734817505, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02945178672671318, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023153672926127912, "signal/frontier_coverage_15/centered_abs_mean": 0.06422688812017441, "signal/frontier_coverage_15/group_std_mean": 0.08302107304334641, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011686071194708348, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009184445254504681, "signal/frontier_coverage_20/centered_abs_mean": 0.08474169373512268, "signal/frontier_coverage_20/group_std_mean": 0.1076007753610611, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015469780191779137, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012118062004446983, "signal/frontier_coverage_25/centered_abs_mean": 0.13222533762454985, "signal/frontier_coverage_25/group_std_mean": 0.1688993453979492, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0241268590092659, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018908223137259483, "signal/frontier_coverage_5/centered_abs_mean": 0.17390457689762115, "signal/frontier_coverage_5/group_std_mean": 0.22902662456035613, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03165303654968739, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024868354201316835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.331625634431839, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4007671117782593, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42386093735694885, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316256329417229, "step": 215 }, { "calibration/aurc": 0.20061380169228143, "calibration/batch_distribution_entropy": 0.9799272529009608, "calibration/buffer_distribution_entropy": 0.9768673721994799, "calibration/confidence_entropy": 0.4889103468023185, "calibration/coverage@0%": 0.012115919594949408, "calibration/coverage@1%": 0.012115919594949408, "calibration/coverage@10%": 0.24258528536071894, "calibration/coverage@15%": 0.46005582609368306, "calibration/coverage@20%": 0.5882172287298035, "calibration/coverage@25%": 0.6680374267611943, "calibration/coverage@30%": 0.8304669701684627, "calibration/coverage@5%": 0.056363151901883214, "calibration/ece": 0.14497071478111592, "calibration/mean_confidence": 0.5415289580677148, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.060416666666666674, "completions/max_length": 3898.4, "completions/max_terminated_length": 3898.4, "completions/mean_length": 804.4072998046875, "completions/mean_terminated_length": 855.8221435546875, "completions/min_length": 0.0, "completions/min_terminated_length": 230.2, "epoch": 0.527993400082499, "grad_norm": 0.002567050512880087, "learning_rate": 4.927884615384616e-06, "loss": -0.142, "num_tokens": 458440694.0, "reward": 0.956389057636261, "reward_std": 0.1825144648551941, "rewards/accuracy_reward": 0.6917534828186035, "rewards/brier_reward": 0.7530406594276429, "rewards/confidence_uniqueness_reward": 0.8914730548858643, "rewards/format_reward": 0.9363715291023255, "rewards/frontier_coverage_0": -0.014244955778121949, "rewards/frontier_coverage_1": -0.014244955778121949, "rewards/frontier_coverage_10": -0.009593733958899975, "rewards/frontier_coverage_15": 0.036306874454021455, "rewards/frontier_coverage_20": 0.09199995398521424, "rewards/frontier_coverage_25": 0.17109810411930085, "rewards/frontier_coverage_5": -0.014244955778121949, "rewards/frontier_entropy_batch_reward": -0.2565806359052658, "signal/accuracy_reward/centered_abs_mean": 0.14844292402267456, "signal/accuracy_reward/group_std_mean": 0.2044283628463745, "signal/accuracy_reward/group_zero_std_frac": 0.3888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8140737652778626, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07422146201133728, "signal/advantage_abs_mean": 0.7192968487739563, "signal/advantage_pre_scale_abs_mean": 0.13045098185539244, "signal/advantage_pre_scale_std": 0.22229794263839722, "signal/advantage_std": 0.9833795070648194, "signal/brier_reward/centered_abs_mean": 0.1779948115348816, "signal/brier_reward/group_std_mean": 0.22492851316928864, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958990842103958, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.017799481749534607, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09162591844797134, "signal/confidence_uniqueness_reward/group_std_mean": 0.13799885660409927, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10163218230009079, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009162592049688102, "signal/format_reward/centered_abs_mean": 0.0875271275639534, "signal/format_reward/group_std_mean": 0.13421101570129396, "signal/format_reward/group_zero_std_frac": 0.5388889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.48496673703193666, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0437635637819767, "signal/frontier_coverage_0/centered_abs_mean": 0.19460634887218475, "signal/frontier_coverage_0/group_std_mean": 0.2550558179616928, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03059198223054409, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027828707825392485, "signal/frontier_coverage_1/centered_abs_mean": 0.19460634887218475, "signal/frontier_coverage_1/group_std_mean": 0.2550558179616928, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03059198223054409, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027828707825392485, "signal/frontier_coverage_10/centered_abs_mean": 0.17800663709640502, "signal/frontier_coverage_10/group_std_mean": 0.23399430215358735, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02799622118473053, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002545494958758354, "signal/frontier_coverage_15/centered_abs_mean": 0.06852128356695175, "signal/frontier_coverage_15/group_std_mean": 0.08859587162733078, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010821715742349625, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.000979854364413768, "signal/frontier_coverage_20/centered_abs_mean": 0.08110383749008179, "signal/frontier_coverage_20/group_std_mean": 0.10272217839956284, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012808217480778695, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011597848031669855, "signal/frontier_coverage_25/centered_abs_mean": 0.12214765101671218, "signal/frontier_coverage_25/group_std_mean": 0.15550835728645324, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019256106950342654, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001746711414307356, "signal/frontier_coverage_5/centered_abs_mean": 0.19460634887218475, "signal/frontier_coverage_5/group_std_mean": 0.2550558179616928, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03059198223054409, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027828707825392485, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31999850273132324, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3911717176437378, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3527726888656616, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03199984952807426, "step": 220 }, { "calibration/aurc": 0.11836046230203076, "calibration/batch_distribution_entropy": 0.9299152762651394, "calibration/buffer_distribution_entropy": 0.9769273121128415, "calibration/confidence_entropy": 0.48964527661178403, "calibration/coverage@0%": 0.04429167266508173, "calibration/coverage@1%": 0.04429167266508173, "calibration/coverage@10%": 0.6058377209623409, "calibration/coverage@15%": 0.7308888548404051, "calibration/coverage@20%": 0.8211582774335604, "calibration/coverage@25%": 0.8667417686517487, "calibration/coverage@30%": 0.8987181456637139, "calibration/coverage@5%": 0.37037933609710344, "calibration/ece": 0.1627648079617419, "calibration/mean_confidence": 0.6129012228374939, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05642361111111112, "completions/max_length": 3970.6, "completions/max_terminated_length": 3970.6, "completions/mean_length": 854.6477661132812, "completions/mean_terminated_length": 906.0167846679688, "completions/min_length": 0.0, "completions/min_terminated_length": 242.6, "epoch": 0.5399932500843739, "grad_norm": 0.0025974006857722998, "learning_rate": 4.897836538461539e-06, "loss": -0.1382, "num_tokens": 471414428.0, "reward": 0.9573588132858276, "reward_std": 0.17803834080696107, "rewards/accuracy_reward": 0.6902777791023255, "rewards/brier_reward": 0.7832067370414734, "rewards/confidence_uniqueness_reward": 0.8929091334342957, "rewards/format_reward": 0.9434895992279053, "rewards/frontier_coverage_0": 0.009216944687068462, "rewards/frontier_coverage_1": 0.009216944687068462, "rewards/frontier_coverage_10": 0.012849159445613623, "rewards/frontier_coverage_15": 0.047060129791498186, "rewards/frontier_coverage_20": 0.11295257806777954, "rewards/frontier_coverage_25": 0.20341356098651886, "rewards/frontier_coverage_5": 0.009216944687068462, "rewards/frontier_entropy_batch_reward": -0.3291258454322815, "signal/accuracy_reward/centered_abs_mean": 0.1601019948720932, "signal/accuracy_reward/group_std_mean": 0.20708029568195344, "signal/accuracy_reward/group_zero_std_frac": 0.425, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9800410747528077, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0800509974360466, "signal/advantage_abs_mean": 0.7600647449493408, "signal/advantage_pre_scale_abs_mean": 0.1340820536017418, "signal/advantage_pre_scale_std": 0.22110334038734436, "signal/advantage_std": 0.9832608580589295, "signal/brier_reward/centered_abs_mean": 0.15971020460128785, "signal/brier_reward/group_std_mean": 0.20280967950820922, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19527204036712648, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015971020981669425, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08719486072659492, "signal/confidence_uniqueness_reward/group_std_mean": 0.12454380840063095, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10635674297809601, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008719485811889171, "signal/format_reward/centered_abs_mean": 0.080126953125, "signal/format_reward/group_std_mean": 0.116636623442173, "signal/format_reward/group_zero_std_frac": 0.6305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.4883050560951233, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0400634765625, "signal/frontier_coverage_0/centered_abs_mean": 0.1580364465713501, "signal/frontier_coverage_0/group_std_mean": 0.20654098987579345, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02764430344104767, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022599212359637024, "signal/frontier_coverage_1/centered_abs_mean": 0.1580364465713501, "signal/frontier_coverage_1/group_std_mean": 0.20654098987579345, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02764430344104767, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022599212359637024, "signal/frontier_coverage_10/centered_abs_mean": 0.1387157380580902, "signal/frontier_coverage_10/group_std_mean": 0.1824465125799179, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024255484342575073, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019836350344121455, "signal/frontier_coverage_15/centered_abs_mean": 0.061208389699459076, "signal/frontier_coverage_15/group_std_mean": 0.07726839333772659, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010705550946295262, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008752800291404128, "signal/frontier_coverage_20/centered_abs_mean": 0.09257221668958664, "signal/frontier_coverage_20/group_std_mean": 0.11682336181402206, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016201268322765826, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013237826991826297, "signal/frontier_coverage_25/centered_abs_mean": 0.14442039132118226, "signal/frontier_coverage_25/group_std_mean": 0.18332480192184447, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02527971677482128, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020652116276323796, "signal/frontier_coverage_5/centered_abs_mean": 0.1580364465713501, "signal/frontier_coverage_5/group_std_mean": 0.20654098987579345, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02764430344104767, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022599212359637024, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3387024819850922, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40370280146598814, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4145235657691956, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387024849653244, "step": 225 }, { "calibration/aurc": 0.1259500554224069, "calibration/batch_distribution_entropy": 0.9646647743133172, "calibration/buffer_distribution_entropy": 0.9760416707390451, "calibration/confidence_entropy": 0.5170283098740476, "calibration/coverage@0%": 0.1321393950216345, "calibration/coverage@1%": 0.14262021842656228, "calibration/coverage@10%": 0.49291161070043243, "calibration/coverage@15%": 0.6123658945814797, "calibration/coverage@20%": 0.8416557899316519, "calibration/coverage@25%": 0.8898460415701794, "calibration/coverage@30%": 0.9226909744151124, "calibration/coverage@5%": 0.37023242162917447, "calibration/ece": 0.17620611336596587, "calibration/mean_confidence": 0.5703640132680619, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01987847222222221, "completions/max_length": 3884.8, "completions/max_terminated_length": 3884.8, "completions/mean_length": 895.6581665039063, "completions/mean_terminated_length": 914.0191528320313, "completions/min_length": 0.0, "completions/min_terminated_length": 279.4, "epoch": 0.5519931000862489, "grad_norm": 0.002822543727234006, "learning_rate": 4.867788461538462e-06, "loss": -0.0525, "num_tokens": 484813178.0, "reward": 0.9878373265266418, "reward_std": 0.13694891929626465, "rewards/accuracy_reward": 0.6951388955116272, "rewards/brier_reward": 0.7968821167945862, "rewards/confidence_uniqueness_reward": 0.9321338534355164, "rewards/format_reward": 0.9801215291023254, "rewards/frontier_coverage_0": 0.00919010564684868, "rewards/frontier_coverage_1": 0.00919010564684868, "rewards/frontier_coverage_10": 0.013235241547226906, "rewards/frontier_coverage_15": 0.04320261515676975, "rewards/frontier_coverage_20": 0.0952497273683548, "rewards/frontier_coverage_25": 0.17420322000980376, "rewards/frontier_coverage_5": 0.00919010564684868, "rewards/frontier_entropy_batch_reward": -0.2774901568889618, "signal/accuracy_reward/centered_abs_mean": 0.14523654580116271, "signal/accuracy_reward/group_std_mean": 0.19628881812095642, "signal/accuracy_reward/group_zero_std_frac": 0.4250000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9387128591537476, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07261827290058136, "signal/advantage_abs_mean": 0.737873125076294, "signal/advantage_pre_scale_abs_mean": 0.09995074719190597, "signal/advantage_pre_scale_std": 0.16725102066993713, "signal/advantage_std": 0.9831852674484253, "signal/brier_reward/centered_abs_mean": 0.13690456748008728, "signal/brier_reward/group_std_mean": 0.17525491416454314, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17712121903896333, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013690456189215184, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04152504913508892, "signal/confidence_uniqueness_reward/group_std_mean": 0.06767643317580223, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05302448347210884, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004152504866942764, "signal/format_reward/centered_abs_mean": 0.03237304650247097, "signal/format_reward/group_std_mean": 0.05700000524520874, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2050231069326401, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016186523251235485, "signal/frontier_coverage_0/centered_abs_mean": 0.18283499777317047, "signal/frontier_coverage_0/group_std_mean": 0.23812003433704376, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03383407108485699, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026145403273403643, "signal/frontier_coverage_1/centered_abs_mean": 0.18283499777317047, "signal/frontier_coverage_1/group_std_mean": 0.23812003433704376, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03383407108485699, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026145403273403643, "signal/frontier_coverage_10/centered_abs_mean": 0.1571557939052582, "signal/frontier_coverage_10/group_std_mean": 0.20568057000637055, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02907021902501583, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0022473279386758804, "signal/frontier_coverage_15/centered_abs_mean": 0.05910146087408066, "signal/frontier_coverage_15/group_std_mean": 0.0758358508348465, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011005043797194958, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008451508358120919, "signal/frontier_coverage_20/centered_abs_mean": 0.07917019873857498, "signal/frontier_coverage_20/group_std_mean": 0.10083940923213959, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014786782115697861, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001132133835926652, "signal/frontier_coverage_25/centered_abs_mean": 0.12114208936691284, "signal/frontier_coverage_25/group_std_mean": 0.15490374565124512, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02259993925690651, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017323318403214215, "signal/frontier_coverage_5/centered_abs_mean": 0.18283499777317047, "signal/frontier_coverage_5/group_std_mean": 0.23812003433704376, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03383407108485699, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026145403273403643, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32804338335990907, "signal/frontier_entropy_batch_reward/group_std_mean": 0.396223646402359, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42523607015609743, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03280433937907219, "step": 230 }, { "calibration/aurc": 0.17411097994690442, "calibration/batch_distribution_entropy": 0.9419306618782685, "calibration/buffer_distribution_entropy": 0.9762051558491983, "calibration/confidence_entropy": 0.459608762995326, "calibration/coverage@0%": 0.04538488280031869, "calibration/coverage@1%": 0.04538488280031869, "calibration/coverage@10%": 0.43346879259359766, "calibration/coverage@15%": 0.5383327090819566, "calibration/coverage@20%": 0.7157446177083908, "calibration/coverage@25%": 0.7734377818017153, "calibration/coverage@30%": 0.8238491882719682, "calibration/coverage@5%": 0.20049017071840577, "calibration/ece": 0.10645703859292935, "calibration/mean_confidence": 0.580155255120576, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009201388888888884, "completions/max_length": 3886.6, "completions/max_terminated_length": 3886.6, "completions/mean_length": 938.2537353515625, "completions/mean_terminated_length": 946.9867431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 288.8, "epoch": 0.5639929500881239, "grad_norm": 0.0031697500962764025, "learning_rate": 4.837740384615385e-06, "loss": -0.0189, "num_tokens": 498712453.0, "reward": 0.9915923118591309, "reward_std": 0.12465869039297103, "rewards/accuracy_reward": 0.6875, "rewards/brier_reward": 0.8318912744522095, "rewards/confidence_uniqueness_reward": 0.9377104878425598, "rewards/format_reward": 0.9907118082046509, "rewards/frontier_coverage_0": 0.05437804870307446, "rewards/frontier_coverage_1": 0.05437804870307446, "rewards/frontier_coverage_10": 0.05576707310974598, "rewards/frontier_coverage_15": 0.06549909114837646, "rewards/frontier_coverage_20": 0.1282554194331169, "rewards/frontier_coverage_25": 0.2169576346874237, "rewards/frontier_coverage_5": 0.05437805764377117, "rewards/frontier_entropy_batch_reward": -0.33477243185043337, "signal/accuracy_reward/centered_abs_mean": 0.14432508647441863, "signal/accuracy_reward/group_std_mean": 0.19657641947269439, "signal/accuracy_reward/group_zero_std_frac": 0.41666666865348817, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9984345197677612, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07216254323720932, "signal/advantage_abs_mean": 0.7449512004852294, "signal/advantage_pre_scale_abs_mean": 0.09156662523746491, "signal/advantage_pre_scale_std": 0.14934734106063843, "signal/advantage_std": 0.9831010937690735, "signal/brier_reward/centered_abs_mean": 0.1267389699816704, "signal/brier_reward/group_std_mean": 0.1676239401102066, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17560543715953827, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012673897296190261, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029656323418021203, "signal/confidence_uniqueness_reward/group_std_mean": 0.04549751281738281, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041109825298190114, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029656322207301856, "signal/format_reward/centered_abs_mean": 0.01534830741584301, "signal/format_reward/group_std_mean": 0.02814323566854, "signal/format_reward/group_zero_std_frac": 0.8861111283302308, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10632295608520508, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007674153707921505, "signal/frontier_coverage_0/centered_abs_mean": 0.16365084946155548, "signal/frontier_coverage_0/group_std_mean": 0.21851195394992828, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032391490787267684, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002340207155793905, "signal/frontier_coverage_1/centered_abs_mean": 0.16365084946155548, "signal/frontier_coverage_1/group_std_mean": 0.21851195394992828, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032391490787267684, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002340207155793905, "signal/frontier_coverage_10/centered_abs_mean": 0.13863745629787444, "signal/frontier_coverage_10/group_std_mean": 0.1869216591119766, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027440791577100755, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019825156312435867, "signal/frontier_coverage_15/centered_abs_mean": 0.06753401160240173, "signal/frontier_coverage_15/group_std_mean": 0.0844781219959259, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013386444002389909, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009657363407313824, "signal/frontier_coverage_20/centered_abs_mean": 0.09699487835168838, "signal/frontier_coverage_20/group_std_mean": 0.12251366078853607, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019232844188809394, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001387026789598167, "signal/frontier_coverage_25/centered_abs_mean": 0.1444738209247589, "signal/frontier_coverage_25/group_std_mean": 0.18456913232803346, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02863982766866684, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002065975684672594, "signal/frontier_coverage_5/centered_abs_mean": 0.16365076303482057, "signal/frontier_coverage_5/group_std_mean": 0.2185118556022644, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032391472905874255, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002340205991640687, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35011342763900755, "signal/frontier_entropy_batch_reward/group_std_mean": 0.41561758518218994, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48549712300300596, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03501134365797043, "step": 235 }, { "calibration/aurc": 0.14805087430462194, "calibration/batch_distribution_entropy": 0.9523826885854355, "calibration/buffer_distribution_entropy": 0.9767688978356597, "calibration/confidence_entropy": 0.4784020873564082, "calibration/coverage@0%": 0.05123674052686704, "calibration/coverage@1%": 0.07832007386020037, "calibration/coverage@10%": 0.43130871841538615, "calibration/coverage@15%": 0.6591521632886991, "calibration/coverage@20%": 0.7477753048857446, "calibration/coverage@25%": 0.8091120104366313, "calibration/coverage@30%": 0.8810251139219194, "calibration/coverage@5%": 0.21753699557956985, "calibration/ece": 0.17466656035166012, "calibration/mean_confidence": 0.5136080409437115, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666652, "completions/max_length": 3899.2, "completions/max_terminated_length": 3899.2, "completions/mean_length": 1022.4588745117187, "completions/mean_terminated_length": 1026.7896850585937, "completions/min_length": 0.0, "completions/min_terminated_length": 307.6, "epoch": 0.5759928000899989, "grad_norm": 0.0034370513167232275, "learning_rate": 4.807692307692308e-06, "loss": -0.0014, "num_tokens": 513614667.0, "reward": 1.0006279349327087, "reward_std": 0.11839591711759567, "rewards/accuracy_reward": 0.6960069417953492, "rewards/brier_reward": 0.8194296598434448, "rewards/confidence_uniqueness_reward": 0.9458336114883423, "rewards/format_reward": 0.9958333253860474, "rewards/frontier_coverage_0": 0.033486737415660175, "rewards/frontier_coverage_1": 0.033486737415660175, "rewards/frontier_coverage_10": 0.03658188153058291, "rewards/frontier_coverage_15": 0.05663715898990631, "rewards/frontier_coverage_20": 0.11354580670595169, "rewards/frontier_coverage_25": 0.19528359770774842, "rewards/frontier_coverage_5": 0.03349523107754067, "rewards/frontier_entropy_batch_reward": -0.2900451928377151, "signal/accuracy_reward/centered_abs_mean": 0.15009765625, "signal/accuracy_reward/group_std_mean": 0.19873380959033965, "signal/accuracy_reward/group_zero_std_frac": 0.43055555820465086, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0312488436698914, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.075048828125, "signal/advantage_abs_mean": 0.7495712041854858, "signal/advantage_pre_scale_abs_mean": 0.08905573338270187, "signal/advantage_pre_scale_std": 0.1401791453361511, "signal/advantage_std": 0.9831058859825135, "signal/brier_reward/centered_abs_mean": 0.13217740058898925, "signal/brier_reward/group_std_mean": 0.17042210102081298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18192693293094636, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013217740133404732, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0206814207136631, "signal/confidence_uniqueness_reward/group_std_mean": 0.032660214602947234, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02828650362789631, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020681420573964715, "signal/format_reward/centered_abs_mean": 0.0074761285330168905, "signal/format_reward/group_std_mean": 0.01654504146426916, "signal/format_reward/group_zero_std_frac": 0.9222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.050303632486611606, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0037380642665084452, "signal/frontier_coverage_0/centered_abs_mean": 0.19069683253765107, "signal/frontier_coverage_0/group_std_mean": 0.24695312976837158, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037591222673654556, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027269646525382996, "signal/frontier_coverage_1/centered_abs_mean": 0.19069683253765107, "signal/frontier_coverage_1/group_std_mean": 0.24695312976837158, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037591222673654556, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027269646525382996, "signal/frontier_coverage_10/centered_abs_mean": 0.15149562060832977, "signal/frontier_coverage_10/group_std_mean": 0.19783547520637512, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02986070066690445, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0021663873456418516, "signal/frontier_coverage_15/centered_abs_mean": 0.06695376113057136, "signal/frontier_coverage_15/group_std_mean": 0.08413880467414855, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013179291039705276, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009574387571774423, "signal/frontier_coverage_20/centered_abs_mean": 0.09091886132955551, "signal/frontier_coverage_20/group_std_mean": 0.11494339257478714, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01790587417781353, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013001396786421538, "signal/frontier_coverage_25/centered_abs_mean": 0.13444166928529738, "signal/frontier_coverage_25/group_std_mean": 0.1717398762702942, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026477331668138503, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001922515919432044, "signal/frontier_coverage_5/centered_abs_mean": 0.19062730073928832, "signal/frontier_coverage_5/group_std_mean": 0.24686557352542876, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037577494978904724, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027259701397269963, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33337036371231077, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4049807250499725, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4597449839115143, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03333703950047493, "step": 240 }, { "calibration/aurc": 0.16900973620820287, "calibration/batch_distribution_entropy": 0.9771511357937541, "calibration/buffer_distribution_entropy": 0.9782663947673533, "calibration/confidence_entropy": 0.4753566342042407, "calibration/coverage@0%": 0.03663830715532286, "calibration/coverage@1%": 0.03663830715532286, "calibration/coverage@10%": 0.4480230148342059, "calibration/coverage@15%": 0.5379090314136126, "calibration/coverage@20%": 0.6642861038394414, "calibration/coverage@25%": 0.7254417539267015, "calibration/coverage@30%": 0.8279804755671902, "calibration/coverage@5%": 0.25915139616055843, "calibration/ece": 0.15252473465314134, "calibration/mean_confidence": 0.5344061439463352, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3985.6, "completions/max_terminated_length": 3985.6, "completions/mean_length": 1053.7048828125, "completions/mean_terminated_length": 1057.9298950195312, "completions/min_length": 0.0, "completions/min_terminated_length": 314.8, "epoch": 0.5879926500918738, "grad_norm": 0.0036573780234903097, "learning_rate": 4.777644230769231e-06, "loss": -0.0107, "num_tokens": 528848419.0, "reward": 1.0006993889808655, "reward_std": 0.11764541864395142, "rewards/accuracy_reward": 0.6928819417953491, "rewards/brier_reward": 0.8288188457489014, "rewards/confidence_uniqueness_reward": 0.9452489256858826, "rewards/format_reward": 0.9959201335906982, "rewards/frontier_coverage_0": 0.03839828912168741, "rewards/frontier_coverage_1": 0.03839828912168741, "rewards/frontier_coverage_10": 0.04134276360273361, "rewards/frontier_coverage_15": 0.06121814027428627, "rewards/frontier_coverage_20": 0.12238069325685501, "rewards/frontier_coverage_25": 0.20530767738819122, "rewards/frontier_coverage_5": 0.038481189869344234, "rewards/frontier_entropy_batch_reward": -0.2890947461128235, "signal/accuracy_reward/centered_abs_mean": 0.14274088740348817, "signal/accuracy_reward/group_std_mean": 0.1888882637023926, "signal/accuracy_reward/group_zero_std_frac": 0.45833333134651183, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0081943988800048, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07137044370174409, "signal/advantage_abs_mean": 0.7547778844833374, "signal/advantage_pre_scale_abs_mean": 0.0882973000407219, "signal/advantage_pre_scale_std": 0.14071423560380936, "signal/advantage_std": 0.9830685734748841, "signal/brier_reward/centered_abs_mean": 0.12028724402189254, "signal/brier_reward/group_std_mean": 0.15494127571582794, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17019274830818176, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01202872470021248, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020508787035942076, "signal/confidence_uniqueness_reward/group_std_mean": 0.03213195875287056, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028992549702525138, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020508787129074335, "signal/format_reward/centered_abs_mean": 0.00753580741584301, "signal/format_reward/group_std_mean": 0.016222146898508073, "signal/format_reward/group_zero_std_frac": 0.9277777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05308457799255848, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003767903707921505, "signal/frontier_coverage_0/centered_abs_mean": 0.16301291882991792, "signal/frontier_coverage_0/group_std_mean": 0.2091508388519287, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03296785391867161, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002331084804609418, "signal/frontier_coverage_1/centered_abs_mean": 0.16301291882991792, "signal/frontier_coverage_1/group_std_mean": 0.2091508388519287, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03296785391867161, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002331084804609418, "signal/frontier_coverage_10/centered_abs_mean": 0.12608958184719085, "signal/frontier_coverage_10/group_std_mean": 0.16320188641548156, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.025517260655760765, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018030810402706265, "signal/frontier_coverage_15/centered_abs_mean": 0.06297002360224724, "signal/frontier_coverage_15/group_std_mean": 0.07902814149856567, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0127723790705204, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009004713268950581, "signal/frontier_coverage_20/centered_abs_mean": 0.09279530793428421, "signal/frontier_coverage_20/group_std_mean": 0.11791681945323944, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0188205661252141, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013269728980958462, "signal/frontier_coverage_25/centered_abs_mean": 0.13788617104291917, "signal/frontier_coverage_25/group_std_mean": 0.17627765834331513, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02795053906738758, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001971772196702659, "signal/frontier_coverage_5/centered_abs_mean": 0.16277010440826417, "signal/frontier_coverage_5/group_std_mean": 0.2088464915752411, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032919974997639656, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023276124149560927, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3240995168685913, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3907240152359009, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4595638155937195, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03240995183587074, "step": 245 }, { "calibration/aurc": 0.19836911869531343, "calibration/batch_distribution_entropy": 0.9675209434010308, "calibration/buffer_distribution_entropy": 0.9786828224775217, "calibration/confidence_entropy": 0.4952140725739983, "calibration/coverage@0%": 0.05794984769364665, "calibration/coverage@1%": 0.08040415578764143, "calibration/coverage@10%": 0.28509573542210614, "calibration/coverage@15%": 0.4285329634464752, "calibration/coverage@20%": 0.5708333333333334, "calibration/coverage@25%": 0.7375, "calibration/coverage@30%": 0.7755208333333333, "calibration/coverage@5%": 0.17335454743255005, "calibration/ece": 0.22776192901028072, "calibration/mean_confidence": 0.5247643825024477, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004861111111111138, "completions/max_length": 3758.6, "completions/max_terminated_length": 3758.6, "completions/mean_length": 1066.9283203125, "completions/mean_terminated_length": 1072.1962158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 466.8, "epoch": 0.5999925000937488, "grad_norm": 0.0039780340157449245, "learning_rate": 4.747596153846154e-06, "loss": -0.0083, "num_tokens": 544243433.0, "reward": 0.9987628102302551, "reward_std": 0.11265359967947006, "rewards/accuracy_reward": 0.6972222328186035, "rewards/brier_reward": 0.8149329781532287, "rewards/confidence_uniqueness_reward": 0.9435397028923035, "rewards/format_reward": 0.9951388835906982, "rewards/frontier_coverage_0": 0.030471760779619217, "rewards/frontier_coverage_1": 0.030471760779619217, "rewards/frontier_coverage_10": 0.032759527862071994, "rewards/frontier_coverage_15": 0.06044907793402672, "rewards/frontier_coverage_20": 0.1200255960226059, "rewards/frontier_coverage_25": 0.19917434453964233, "rewards/frontier_coverage_5": 0.030477907881140708, "rewards/frontier_entropy_batch_reward": -0.3046976327896118, "signal/accuracy_reward/centered_abs_mean": 0.13567708283662797, "signal/accuracy_reward/group_std_mean": 0.176465305685997, "signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0347381114959717, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06783854141831398, "signal/advantage_abs_mean": 0.762245523929596, "signal/advantage_pre_scale_abs_mean": 0.0862567737698555, "signal/advantage_pre_scale_std": 0.1369766414165497, "signal/advantage_std": 0.982957637310028, "signal/brier_reward/centered_abs_mean": 0.12140593230724335, "signal/brier_reward/group_std_mean": 0.15745844841003417, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1853317677974701, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012140593118965625, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0218271866440773, "signal/confidence_uniqueness_reward/group_std_mean": 0.03432033360004425, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03319373317062855, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021827186457812784, "signal/format_reward/centered_abs_mean": 0.008181423833593725, "signal/format_reward/group_std_mean": 0.017716386914253236, "signal/format_reward/group_zero_std_frac": 0.9166666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0609793234616518, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004090711916796863, "signal/frontier_coverage_0/centered_abs_mean": 0.1704305589199066, "signal/frontier_coverage_0/group_std_mean": 0.22056526243686675, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03730859383940697, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024371568579226732, "signal/frontier_coverage_1/centered_abs_mean": 0.1704305589199066, "signal/frontier_coverage_1/group_std_mean": 0.22056526243686675, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03730859383940697, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024371568579226732, "signal/frontier_coverage_10/centered_abs_mean": 0.1306050345301628, "signal/frontier_coverage_10/group_std_mean": 0.16988409161567689, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.028580862656235696, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018676520558074117, "signal/frontier_coverage_15/centered_abs_mean": 0.06310615763068199, "signal/frontier_coverage_15/group_std_mean": 0.07897710651159287, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013806315325200557, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009024180588312447, "signal/frontier_coverage_20/centered_abs_mean": 0.08676007241010666, "signal/frontier_coverage_20/group_std_mean": 0.10938064604997635, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018959224969148637, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012406690511852503, "signal/frontier_coverage_25/centered_abs_mean": 0.12573317885398866, "signal/frontier_coverage_25/group_std_mean": 0.1593530297279358, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027454627305269243, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017979845171794296, "signal/frontier_coverage_5/centered_abs_mean": 0.17006706297397614, "signal/frontier_coverage_5/group_std_mean": 0.22009154856204988, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037230221554636955, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002431959193199873, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32585279941558837, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39470202326774595, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4988634824752808, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03258528187870979, "step": 250 }, { "epoch": 0.5999925000937488, "eval_calibration/aurc": 0.1385071168629361, "eval_calibration/batch_distribution_entropy": 0.9366491473344589, "eval_calibration/buffer_distribution_entropy": 0.9790936212740405, "eval_calibration/confidence_entropy": 0.4766008180966541, "eval_calibration/coverage@0%": 0.3082997311827957, "eval_calibration/coverage@1%": 0.3082997311827957, "eval_calibration/coverage@10%": 0.5120967741935484, "eval_calibration/coverage@15%": 0.597614247311828, "eval_calibration/coverage@20%": 0.760752688172043, "eval_calibration/coverage@25%": 0.8610551075268816, "eval_calibration/coverage@30%": 0.946236559139785, "eval_calibration/coverage@5%": 0.3082997311827957, "eval_calibration/ece": 0.25818358534946234, "eval_calibration/mean_confidence": 0.5132839549731183, "eval_completions/clipped_ratio": 0.006944444444444457, "eval_completions/max_length": 3228.1666666666665, "eval_completions/max_terminated_length": 3228.1666666666665, "eval_completions/mean_length": 1050.439961751302, "eval_completions/mean_terminated_length": 1057.86083984375, "eval_completions/min_length": 93.66666666666667, "eval_completions/min_terminated_length": 480.6666666666667, "eval_loss": 0.0, "eval_num_tokens": 544243433.0, "eval_reward": 0.9110045929749807, "eval_reward_std": 0.2316128040353457, "eval_rewards/accuracy_reward": 0.6814236144224802, "eval_rewards/brier_reward": 0.7850200235843658, "eval_rewards/confidence_uniqueness_reward": 0.8943492472171783, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/frontier_coverage_0": 0.014424265439932546, "eval_rewards/frontier_coverage_1": 0.014424265439932546, "eval_rewards/frontier_coverage_10": 0.018757762853056192, "eval_rewards/frontier_coverage_15": 0.05081125907599926, "eval_rewards/frontier_coverage_20": 0.10140267262856166, "eval_rewards/frontier_coverage_25": 0.16897361477216086, "eval_rewards/frontier_coverage_5": 0.014481973213454088, "eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074, "eval_runtime": 211.4128, "eval_samples_per_second": 4.73, "eval_signal/accuracy_reward/centered_abs_mean": 0.4216037342945735, "eval_signal/accuracy_reward/group_std_mean": 0.46546362340450287, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9207923909028372, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21080186714728674, "eval_signal/advantage_abs_mean": 0.8664587438106537, "eval_signal/advantage_pre_scale_abs_mean": 0.20114790399869284, "eval_signal/advantage_pre_scale_std": 0.2299346203605334, "eval_signal/advantage_std": 0.9863927960395813, "eval_signal/brier_reward/centered_abs_mean": 0.20396561920642853, "eval_signal/brier_reward/group_std_mean": 0.25821878264347714, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08907666057348251, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020396563224494457, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045910464599728584, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07307459662357967, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020038395809630554, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004591046599671245, "eval_signal/format_reward/centered_abs_mean": 0.015136718439559141, "eval_signal/format_reward/group_std_mean": 0.044194173688689865, "eval_signal/format_reward/group_zero_std_frac": 0.7500000298023224, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03261481939504544, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359219779571, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.3264067123333613, "eval_signal/frontier_coverage_0/group_std_mean": 0.4418923109769821, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020408068783581257, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0046676161388556165, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.3264067123333613, "eval_signal/frontier_coverage_1/group_std_mean": 0.4418923109769821, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020408068783581257, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0046676161388556165, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.24455400804678598, "eval_signal/frontier_coverage_10/group_std_mean": 0.3390832841396332, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015295245063801607, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034971223988880715, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08623725920915604, "eval_signal/frontier_coverage_15/group_std_mean": 0.10872507840394974, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005391905394693215, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012331928010098636, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.13774426033099493, "eval_signal/frontier_coverage_20/group_std_mean": 0.17580651740233103, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008605041385938724, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019697428409320614, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.22903726249933243, "eval_signal/frontier_coverage_25/group_std_mean": 0.2837483336528142, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01430831989273429, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00327523285523057, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.325265496969223, "eval_signal/frontier_coverage_5/group_std_mean": 0.4404994646708171, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02033673506230116, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0046512965733806295, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718439559141, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173688689865, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7500000298023224, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006522963910053174, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719254466395, "eval_steps_per_second": 0.028, "step": 250 }, { "epoch": 0.5999925000937488, "step": 250, "train_probe_calibration/aurc": 0.14293172797978523, "train_probe_calibration/batch_distribution_entropy": 0.9377545299480441, "train_probe_calibration/buffer_distribution_entropy": 0.9791906702728134, "train_probe_calibration/confidence_entropy": 0.46180745643026194, "train_probe_calibration/coverage@0%": 0.19027777777777777, "train_probe_calibration/coverage@1%": 0.19027777777777777, "train_probe_calibration/coverage@10%": 0.4875, "train_probe_calibration/coverage@15%": 0.68125, "train_probe_calibration/coverage@20%": 0.8065972222222223, "train_probe_calibration/coverage@25%": 0.9270833333333334, "train_probe_calibration/coverage@30%": 0.984375, "train_probe_calibration/coverage@5%": 0.2569444444444444, "train_probe_calibration/ece": 0.2584190625, "train_probe_calibration/mean_confidence": 0.5207807291666667, "train_probe_completions/clipped_ratio": 0.00434027777777779, "train_probe_completions/max_length": 3085.3333333333335, "train_probe_completions/max_terminated_length": 3085.3333333333335, "train_probe_completions/mean_length": 1064.1022847493489, "train_probe_completions/mean_terminated_length": 1068.6518046061199, "train_probe_completions/min_length": 169.5, "train_probe_completions/min_terminated_length": 459.3333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 544243433.0, "train_probe_reward": 0.9264779885609945, "train_probe_reward_std": 0.22200091928243637, "train_probe_rewards/accuracy_reward": 0.7013888855775198, "train_probe_rewards/brier_reward": 0.8103241423765818, "train_probe_rewards/confidence_uniqueness_reward": 0.8954811990261078, "train_probe_rewards/format_reward": 0.9956597288449606, "train_probe_rewards/frontier_coverage_0": 0.024293637834489346, "train_probe_rewards/frontier_coverage_1": 0.024293637834489346, "train_probe_rewards/frontier_coverage_10": 0.03029835526831448, "train_probe_rewards/frontier_coverage_15": 0.06300980473558108, "train_probe_rewards/frontier_coverage_20": 0.12049084653457005, "train_probe_rewards/frontier_coverage_25": 0.1985230545202891, "train_probe_rewards/frontier_coverage_5": 0.024340201790134113, "train_probe_rewards/frontier_entropy_batch_reward": -0.9956597288449606, "train_probe_runtime": 197.6873, "train_probe_samples_per_second": 5.058, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4080946147441864, "train_probe_signal/accuracy_reward/group_std_mean": 0.457661638657252, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.930666039387385, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.2040473073720932, "train_probe_signal/advantage_abs_mean": 0.8634511530399323, "train_probe_signal/advantage_pre_scale_abs_mean": 0.19239966322978339, "train_probe_signal/advantage_pre_scale_std": 0.22050043443838754, "train_probe_signal/advantage_std": 0.9863737424214681, "train_probe_signal/brier_reward/centered_abs_mean": 0.18432209392388663, "train_probe_signal/brier_reward/group_std_mean": 0.237020214398702, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08412475387255351, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01843220926821232, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045412225648760796, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.062134902303417526, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02067517675459385, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045412226269642515, "train_probe_signal/format_reward/centered_abs_mean": 0.008300781094779571, "train_probe_signal/format_reward/group_std_mean": 0.021562910017867882, "train_probe_signal/format_reward/group_zero_std_frac": 0.8888889153798422, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018439628494282562, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004150390547389786, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.31929043928782147, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.4300284336010615, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.020870385070641834, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004565853159874678, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.31929043928782147, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.4300284336010615, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.020870385070641834, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004565853159874678, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.23945064842700958, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.32852159440517426, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015652922447770834, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0034241442335769534, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08496188372373581, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.10551841805378596, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005550025108580788, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012149549438618124, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.14249480267365774, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.1771022950609525, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009304065412531296, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020376756826105216, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.23307730754216513, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.28281400601069134, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015215486288070679, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333005510891477, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.31822994848092395, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4287427266438802, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020800404871503513, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00455068820156157, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008300781094779571, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.021562910017867882, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889153798422, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003687925481547912, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008300781094779571, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.15048834529617455, "calibration/batch_distribution_entropy": 0.9338231344693714, "calibration/buffer_distribution_entropy": 0.9794221749818541, "calibration/confidence_entropy": 0.4710076533233304, "calibration/coverage@0%": 0.02302989293558958, "calibration/coverage@1%": 0.02302989293558958, "calibration/coverage@10%": 0.5242672934797624, "calibration/coverage@15%": 0.6491196556844868, "calibration/coverage@20%": 0.6915409155270065, "calibration/coverage@25%": 0.8355510084265783, "calibration/coverage@30%": 0.897499654648432, "calibration/coverage@5%": 0.27028120183611315, "calibration/ece": 0.14773781862187638, "calibration/mean_confidence": 0.6141524500746167, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006336805555555558, "completions/max_length": 3891.2, "completions/max_terminated_length": 3891.2, "completions/mean_length": 1071.4462036132813, "completions/mean_terminated_length": 1078.306103515625, "completions/min_length": 0.0, "completions/min_terminated_length": 387.2, "epoch": 0.6119923500956238, "grad_norm": 0.003938134294003248, "learning_rate": 4.7175480769230775e-06, "loss": -0.0119, "num_tokens": 559683901.0, "reward": 0.991357171535492, "reward_std": 0.11556299179792404, "rewards/accuracy_reward": 0.6925347328186036, "rewards/brier_reward": 0.812757420539856, "rewards/confidence_uniqueness_reward": 0.939787495136261, "rewards/format_reward": 0.9936631917953491, "rewards/frontier_coverage_0": 0.02287477208301425, "rewards/frontier_coverage_1": 0.02287477208301425, "rewards/frontier_coverage_10": 0.027250152826309205, "rewards/frontier_coverage_15": 0.0626752346754074, "rewards/frontier_coverage_20": 0.12636226266622544, "rewards/frontier_coverage_25": 0.2056520938873291, "rewards/frontier_coverage_5": 0.022994631039910018, "rewards/frontier_entropy_batch_reward": -0.34013040363788605, "signal/accuracy_reward/centered_abs_mean": 0.1337456613779068, "signal/accuracy_reward/group_std_mean": 0.18114876449108125, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.009685182571411, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0668728306889534, "signal/advantage_abs_mean": 0.7468565940856934, "signal/advantage_pre_scale_abs_mean": 0.08542105257511139, "signal/advantage_pre_scale_std": 0.14232320189476014, "signal/advantage_std": 0.9829666376113891, "signal/brier_reward/centered_abs_mean": 0.12079736739397048, "signal/brier_reward/group_std_mean": 0.1556779623031616, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1833444505929947, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012079737335443496, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025875761359930038, "signal/confidence_uniqueness_reward/group_std_mean": 0.03979103080928326, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03993023969233036, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025875761872157454, "signal/format_reward/centered_abs_mean": 0.011170790065079927, "signal/format_reward/group_std_mean": 0.02186266928911209, "signal/format_reward/group_zero_std_frac": 0.9083333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08671931773424149, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005585395032539964, "signal/frontier_coverage_0/centered_abs_mean": 0.15731475353240967, "signal/frontier_coverage_0/group_std_mean": 0.2038748413324356, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033884177729487416, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022496009478345513, "signal/frontier_coverage_1/centered_abs_mean": 0.15731475353240967, "signal/frontier_coverage_1/group_std_mean": 0.2038748413324356, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033884177729487416, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022496009478345513, "signal/frontier_coverage_10/centered_abs_mean": 0.1202172502875328, "signal/frontier_coverage_10/group_std_mean": 0.15625946074724198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02589981146156788, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001719106617383659, "signal/frontier_coverage_15/centered_abs_mean": 0.06376026198267937, "signal/frontier_coverage_15/group_std_mean": 0.07974331229925155, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013887671194970608, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009117717272602022, "signal/frontier_coverage_20/centered_abs_mean": 0.09358802139759063, "signal/frontier_coverage_20/group_std_mean": 0.1184041753411293, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020462489500641824, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013383086305111646, "signal/frontier_coverage_25/centered_abs_mean": 0.13497862517833709, "signal/frontier_coverage_25/group_std_mean": 0.17217137515544892, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02949381247162819, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019301943248137832, "signal/frontier_coverage_5/centered_abs_mean": 0.1568644642829895, "signal/frontier_coverage_5/group_std_mean": 0.2033059686422348, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03378809839487076, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002243161806836724, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3343395471572876, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40089446902275083, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5089798927307129, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033433955162763596, "step": 255 }, { "calibration/aurc": 0.10255353201496739, "calibration/batch_distribution_entropy": 0.9639092103771155, "calibration/buffer_distribution_entropy": 0.9797833697904046, "calibration/confidence_entropy": 0.49060049661677196, "calibration/coverage@0%": 0.10018920337024192, "calibration/coverage@1%": 0.1544279435791192, "calibration/coverage@10%": 0.5559179569224634, "calibration/coverage@15%": 0.7529552754007864, "calibration/coverage@20%": 0.8581241780108335, "calibration/coverage@25%": 0.9133015753791163, "calibration/coverage@30%": 0.9649038163370701, "calibration/coverage@5%": 0.4035025552714583, "calibration/ece": 0.17171925216525696, "calibration/mean_confidence": 0.5537974322635033, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01137152777777779, "completions/max_length": 3994.8, "completions/max_terminated_length": 3994.8, "completions/mean_length": 1101.7650268554687, "completions/mean_terminated_length": 1114.241162109375, "completions/min_length": 0.0, "completions/min_terminated_length": 329.8, "epoch": 0.6239922000974988, "grad_norm": 0.0035454921890050173, "learning_rate": 4.6875000000000004e-06, "loss": -0.0275, "num_tokens": 575475466.0, "reward": 1.003694999217987, "reward_std": 0.12208217233419419, "rewards/accuracy_reward": 0.7048611164093017, "rewards/brier_reward": 0.8128533840179444, "rewards/confidence_uniqueness_reward": 0.9410479426383972, "rewards/format_reward": 0.9886284589767456, "rewards/frontier_coverage_0": 0.022731726244091987, "rewards/frontier_coverage_1": 0.022731726244091987, "rewards/frontier_coverage_10": 0.031263113394379614, "rewards/frontier_coverage_15": 0.06570351123809814, "rewards/frontier_coverage_20": 0.12733635306358337, "rewards/frontier_coverage_25": 0.20722153186798095, "rewards/frontier_coverage_5": 0.02290651835501194, "rewards/frontier_entropy_batch_reward": -0.2558844447135925, "signal/accuracy_reward/centered_abs_mean": 0.13758680671453477, "signal/accuracy_reward/group_std_mean": 0.18783430457115174, "signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0014619827270508, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06879340335726739, "signal/advantage_abs_mean": 0.7372382164001465, "signal/advantage_pre_scale_abs_mean": 0.08913364708423614, "signal/advantage_pre_scale_std": 0.14821802377700805, "signal/advantage_std": 0.9830303311347961, "signal/brier_reward/centered_abs_mean": 0.13460603803396226, "signal/brier_reward/group_std_mean": 0.17414171397686004, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1958859771490097, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013460604101419449, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02883324772119522, "signal/confidence_uniqueness_reward/group_std_mean": 0.04651344493031502, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042122557386755945, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002883324818685651, "signal/format_reward/centered_abs_mean": 0.018267144076526166, "signal/format_reward/group_std_mean": 0.03373241238296032, "signal/format_reward/group_zero_std_frac": 0.8611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13340308517217636, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.009133572038263083, "signal/frontier_coverage_0/centered_abs_mean": 0.18028615415096283, "signal/frontier_coverage_0/group_std_mean": 0.2340587854385376, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03751359954476356, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025780918542295693, "signal/frontier_coverage_1/centered_abs_mean": 0.18028615415096283, "signal/frontier_coverage_1/group_std_mean": 0.2340587854385376, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03751359954476356, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025780918542295693, "signal/frontier_coverage_10/centered_abs_mean": 0.1330754965543747, "signal/frontier_coverage_10/group_std_mean": 0.1743028372526169, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.027698947116732597, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0019029794726520776, "signal/frontier_coverage_15/centered_abs_mean": 0.0681751549243927, "signal/frontier_coverage_15/group_std_mean": 0.08557239919900894, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014194411225616932, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009749047341756523, "signal/frontier_coverage_20/centered_abs_mean": 0.09594286382198333, "signal/frontier_coverage_20/group_std_mean": 0.12045165747404099, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019987112656235696, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013719829730689526, "signal/frontier_coverage_25/centered_abs_mean": 0.1361988067626953, "signal/frontier_coverage_25/group_std_mean": 0.17252913117408752, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028375216573476792, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019476428860798478, "signal/frontier_coverage_5/centered_abs_mean": 0.17972335517406463, "signal/frontier_coverage_5/group_std_mean": 0.23334594070911407, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037396402657032014, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025700438302010296, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30619403123855593, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3734066069126129, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4467845559120178, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03061940483748913, "step": 260 }, { "calibration/aurc": 0.12449450690978159, "calibration/batch_distribution_entropy": 0.9773936708097445, "calibration/buffer_distribution_entropy": 0.9805711500932638, "calibration/confidence_entropy": 0.48593347695128186, "calibration/coverage@0%": 0.08784057472010717, "calibration/coverage@1%": 0.08784057472010717, "calibration/coverage@10%": 0.587357013439868, "calibration/coverage@15%": 0.6641879363001746, "calibration/coverage@20%": 0.778266797556719, "calibration/coverage@25%": 0.8677356020942408, "calibration/coverage@30%": 0.8980339223385689, "calibration/coverage@5%": 0.35331105411489167, "calibration/ece": 0.18071845532152359, "calibration/mean_confidence": 0.555744081252791, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 3896.4, "completions/max_terminated_length": 3896.4, "completions/mean_length": 1038.3904541015625, "completions/mean_terminated_length": 1047.8605834960938, "completions/min_length": 0.0, "completions/min_terminated_length": 294.6, "epoch": 0.6359920500993738, "grad_norm": 0.0030857024248689413, "learning_rate": 4.657451923076923e-06, "loss": -0.0334, "num_tokens": 590507548.0, "reward": 1.000011420249939, "reward_std": 0.11584964096546173, "rewards/accuracy_reward": 0.6975694417953491, "rewards/brier_reward": 0.8233382225036621, "rewards/confidence_uniqueness_reward": 0.9414842247962951, "rewards/format_reward": 0.9910590410232544, "rewards/frontier_coverage_0": 0.034576449729502204, "rewards/frontier_coverage_1": 0.034576449729502204, "rewards/frontier_coverage_10": 0.04155855402350426, "rewards/frontier_coverage_15": 0.07075799554586411, "rewards/frontier_coverage_20": 0.1364063397049904, "rewards/frontier_coverage_25": 0.2174540102481842, "rewards/frontier_coverage_5": 0.03471278678625822, "rewards/frontier_entropy_batch_reward": -0.2893666684627533, "signal/accuracy_reward/centered_abs_mean": 0.1317057266831398, "signal/accuracy_reward/group_std_mean": 0.17435995638370513, "signal/accuracy_reward/group_zero_std_frac": 0.5027777969837188, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9813842535018921, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0658528633415699, "signal/advantage_abs_mean": 0.7615089535713195, "signal/advantage_pre_scale_abs_mean": 0.08686984926462174, "signal/advantage_pre_scale_std": 0.14367577582597732, "signal/advantage_std": 0.9829856991767884, "signal/brier_reward/centered_abs_mean": 0.12998353093862533, "signal/brier_reward/group_std_mean": 0.16700247824192047, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19465568661689758, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012998353131115437, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.027591006830334663, "signal/confidence_uniqueness_reward/group_std_mean": 0.0436860203742981, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041278140246868135, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027591006364673376, "signal/format_reward/centered_abs_mean": 0.015413411241024733, "signal/format_reward/group_std_mean": 0.0289100106805563, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11416576504707336, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007706705620512367, "signal/frontier_coverage_0/centered_abs_mean": 0.17794593572616577, "signal/frontier_coverage_0/group_std_mean": 0.22968710362911224, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038241232931613925, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002544626779854298, "signal/frontier_coverage_1/centered_abs_mean": 0.17794593572616577, "signal/frontier_coverage_1/group_std_mean": 0.22968710362911224, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038241232931613925, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002544626779854298, "signal/frontier_coverage_10/centered_abs_mean": 0.11253818869590759, "signal/frontier_coverage_10/group_std_mean": 0.1477993905544281, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.024206925183534622, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016092960722744465, "signal/frontier_coverage_15/centered_abs_mean": 0.06968608945608139, "signal/frontier_coverage_15/group_std_mean": 0.0868400439620018, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01497387420386076, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009965110919438302, "signal/frontier_coverage_20/centered_abs_mean": 0.09935293346643448, "signal/frontier_coverage_20/group_std_mean": 0.12432914227247238, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021300822868943213, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014207469765096902, "signal/frontier_coverage_25/centered_abs_mean": 0.1397058293223381, "signal/frontier_coverage_25/group_std_mean": 0.17603937685489654, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029925085604190826, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00199779337272048, "signal/frontier_coverage_5/centered_abs_mean": 0.17707998752593995, "signal/frontier_coverage_5/group_std_mean": 0.22862663865089417, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03805320970714092, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002532243775203824, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3300370931625366, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39845150113105776, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.496671199798584, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03300370946526528, "step": 265 }, { "calibration/aurc": 0.12281177092511357, "calibration/batch_distribution_entropy": 0.9656192498774155, "calibration/buffer_distribution_entropy": 0.9823104864801895, "calibration/confidence_entropy": 0.47402582816426486, "calibration/coverage@0%": 0.03688186664003849, "calibration/coverage@1%": 0.03688186664003849, "calibration/coverage@10%": 0.49601819978997863, "calibration/coverage@15%": 0.6787215172399611, "calibration/coverage@20%": 0.8241145702041397, "calibration/coverage@25%": 0.8999529551348966, "calibration/coverage@30%": 0.9731249559051784, "calibration/coverage@5%": 0.28127246150857715, "calibration/ece": 0.17982517686639335, "calibration/mean_confidence": 0.5375241826053218, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012413194444444442, "completions/max_length": 4045.4, "completions/max_terminated_length": 4045.4, "completions/mean_length": 991.3185913085938, "completions/mean_terminated_length": 1003.8561401367188, "completions/min_length": 0.0, "completions/min_terminated_length": 296.0, "epoch": 0.6479919001012487, "grad_norm": 0.002968569053336978, "learning_rate": 4.627403846153847e-06, "loss": -0.0261, "num_tokens": 605047570.0, "reward": 1.0034834623336792, "reward_std": 0.11915335059165955, "rewards/accuracy_reward": 0.7129340291023254, "rewards/brier_reward": 0.8068234920501709, "rewards/confidence_uniqueness_reward": 0.9387187480926513, "rewards/format_reward": 0.987500011920929, "rewards/frontier_coverage_0": 0.01569048868259415, "rewards/frontier_coverage_1": 0.01569048868259415, "rewards/frontier_coverage_10": 0.03181662876158953, "rewards/frontier_coverage_15": 0.07589756101369857, "rewards/frontier_coverage_20": 0.14337013214826583, "rewards/frontier_coverage_25": 0.2238972157239914, "rewards/frontier_coverage_5": 0.01603688622417394, "rewards/frontier_entropy_batch_reward": -0.2875809371471405, "signal/accuracy_reward/centered_abs_mean": 0.13441297858953477, "signal/accuracy_reward/group_std_mean": 0.17354630529880524, "signal/accuracy_reward/group_zero_std_frac": 0.522222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0317163467407227, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06720648929476739, "signal/advantage_abs_mean": 0.7717400074005127, "signal/advantage_pre_scale_abs_mean": 0.0905812844634056, "signal/advantage_pre_scale_std": 0.15285636186599733, "signal/advantage_std": 0.9829501986503602, "signal/brier_reward/centered_abs_mean": 0.13663897514343262, "signal/brier_reward/group_std_mean": 0.17413173317909242, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.21040893495082855, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013663897477090358, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030733636021614073, "signal/confidence_uniqueness_reward/group_std_mean": 0.04751450791954994, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0473335437476635, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003073363611474633, "signal/format_reward/centered_abs_mean": 0.01868489533662796, "signal/format_reward/group_std_mean": 0.032993590086698533, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14405927509069444, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00934244766831398, "signal/frontier_coverage_0/centered_abs_mean": 0.1771648645401001, "signal/frontier_coverage_0/group_std_mean": 0.23024407625198365, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03890465572476387, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025334575679153205, "signal/frontier_coverage_1/centered_abs_mean": 0.1771648645401001, "signal/frontier_coverage_1/group_std_mean": 0.23024407625198365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03890465572476387, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025334575679153205, "signal/frontier_coverage_10/centered_abs_mean": 0.10589916706085205, "signal/frontier_coverage_10/group_std_mean": 0.13918049335479737, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02323727458715439, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001514358026906848, "signal/frontier_coverage_15/centered_abs_mean": 0.0736978754401207, "signal/frontier_coverage_15/group_std_mean": 0.09158166199922561, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01622140742838383, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001053879619576037, "signal/frontier_coverage_20/centered_abs_mean": 0.10349708944559097, "signal/frontier_coverage_20/group_std_mean": 0.13017223328351973, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02279331013560295, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001480008359067142, "signal/frontier_coverage_25/centered_abs_mean": 0.14353952705860137, "signal/frontier_coverage_25/group_std_mean": 0.18179913461208344, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03160831183195114, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020526152104139326, "signal/frontier_coverage_5/centered_abs_mean": 0.17637761533260346, "signal/frontier_coverage_5/group_std_mean": 0.2292585015296936, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03873150199651718, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025221999734640122, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32779831886291505, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39781845808029176, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5039668500423431, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03277983292937279, "step": 270 }, { "calibration/aurc": 0.14417500428320207, "calibration/batch_distribution_entropy": 0.9710154425145918, "calibration/buffer_distribution_entropy": 0.9824627731736181, "calibration/confidence_entropy": 0.4968844676980586, "calibration/coverage@0%": 0.055456773076542884, "calibration/coverage@1%": 0.055456773076542884, "calibration/coverage@10%": 0.43402523337092347, "calibration/coverage@15%": 0.6135070800685314, "calibration/coverage@20%": 0.7761960929383052, "calibration/coverage@25%": 0.872591220602526, "calibration/coverage@30%": 0.9412395657627265, "calibration/coverage@5%": 0.22296472683507887, "calibration/ece": 0.13625054238405615, "calibration/mean_confidence": 0.5763414081845614, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02734375, "completions/max_length": 4069.8, "completions/max_terminated_length": 4069.8, "completions/mean_length": 1096.324658203125, "completions/mean_terminated_length": 1127.08369140625, "completions/min_length": 0.0, "completions/min_terminated_length": 310.4, "epoch": 0.6599917501031237, "grad_norm": 0.0023030159063637257, "learning_rate": 4.597355769230769e-06, "loss": -0.0731, "num_tokens": 620784142.0, "reward": 0.9708781123161316, "reward_std": 0.13851457834243774, "rewards/accuracy_reward": 0.6647569537162781, "rewards/brier_reward": 0.7947103142738342, "rewards/confidence_uniqueness_reward": 0.9273814558982849, "rewards/format_reward": 0.9723958373069763, "rewards/frontier_coverage_0": 0.02882022559642792, "rewards/frontier_coverage_1": 0.028787746839225293, "rewards/frontier_coverage_10": 0.03391992338001728, "rewards/frontier_coverage_15": 0.06893513202667237, "rewards/frontier_coverage_20": 0.12927037924528123, "rewards/frontier_coverage_25": 0.20069342851638794, "rewards/frontier_coverage_5": 0.028869583271443844, "rewards/frontier_entropy_batch_reward": -0.27333354353904726, "signal/accuracy_reward/centered_abs_mean": 0.1373263880610466, "signal/accuracy_reward/group_std_mean": 0.18307480216026306, "signal/accuracy_reward/group_zero_std_frac": 0.4694444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9482576966285705, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0686631940305233, "signal/advantage_abs_mean": 0.7440701246261596, "signal/advantage_pre_scale_abs_mean": 0.10121009647846221, "signal/advantage_pre_scale_std": 0.17259745299816132, "signal/advantage_std": 0.983104145526886, "signal/brier_reward/centered_abs_mean": 0.1418377786874771, "signal/brier_reward/group_std_mean": 0.18231958746910096, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19642621278762817, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014183777570724487, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04775775372982025, "signal/confidence_uniqueness_reward/group_std_mean": 0.07538097202777863, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06636101678013802, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004775775363668799, "signal/format_reward/centered_abs_mean": 0.0403211809694767, "signal/format_reward/group_std_mean": 0.06660629361867905, "signal/format_reward/group_zero_std_frac": 0.75, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.28085810542106626, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02016059048473835, "signal/frontier_coverage_0/centered_abs_mean": 0.18177134394645691, "signal/frontier_coverage_0/group_std_mean": 0.2336914509534836, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03591709956526756, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002599330106750131, "signal/frontier_coverage_1/centered_abs_mean": 0.1816892147064209, "signal/frontier_coverage_1/group_std_mean": 0.23359167873859404, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03590134456753731, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025981557089835407, "signal/frontier_coverage_10/centered_abs_mean": 0.10146680474281311, "signal/frontier_coverage_10/group_std_mean": 0.13296806663274766, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020047903805971146, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001450975309126079, "signal/frontier_coverage_15/centered_abs_mean": 0.06901453286409379, "signal/frontier_coverage_15/group_std_mean": 0.08679485768079757, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013635250180959702, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009869078057818115, "signal/frontier_coverage_20/centered_abs_mean": 0.09845926016569137, "signal/frontier_coverage_20/group_std_mean": 0.1253434956073761, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019467445090413094, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014079674379900097, "signal/frontier_coverage_25/centered_abs_mean": 0.1376819759607315, "signal/frontier_coverage_25/group_std_mean": 0.1762455552816391, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027223770692944526, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019688522443175316, "signal/frontier_coverage_5/centered_abs_mean": 0.18081392645835875, "signal/frontier_coverage_5/group_std_mean": 0.23251225650310517, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03572747558355331, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025856390595436094, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32236793637275696, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39191374778747556, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4449836671352386, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03223679475486278, "step": 275 }, { "calibration/aurc": 0.09641676239838745, "calibration/batch_distribution_entropy": 0.9676579395319906, "calibration/buffer_distribution_entropy": 0.9820208457928654, "calibration/confidence_entropy": 0.5058189262006992, "calibration/coverage@0%": 0.06702250595041062, "calibration/coverage@1%": 0.12578261376712221, "calibration/coverage@10%": 0.6526884191611103, "calibration/coverage@15%": 0.76473341845179, "calibration/coverage@20%": 0.8503245283018869, "calibration/coverage@25%": 0.9002695417789758, "calibration/coverage@30%": 0.9428571428571428, "calibration/coverage@5%": 0.5351988713923174, "calibration/ece": 0.18722123766397122, "calibration/mean_confidence": 0.5592271986784572, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03828125, "completions/max_length": 4062.4, "completions/max_terminated_length": 4062.4, "completions/mean_length": 1125.9796997070312, "completions/mean_terminated_length": 1170.884716796875, "completions/min_length": 0.0, "completions/min_terminated_length": 323.2, "epoch": 0.6719916001049987, "grad_norm": 0.0021780498791486025, "learning_rate": 4.567307692307692e-06, "loss": -0.0884, "num_tokens": 636850724.0, "reward": 0.9621721744537354, "reward_std": 0.15094391703605653, "rewards/accuracy_reward": 0.6716145873069763, "rewards/brier_reward": 0.7638566136360169, "rewards/confidence_uniqueness_reward": 0.9151824951171875, "rewards/format_reward": 0.9613715171813965, "rewards/frontier_coverage_0": -0.0023802617564797402, "rewards/frontier_coverage_1": -0.0023802617564797402, "rewards/frontier_coverage_10": 0.014429821725934744, "rewards/frontier_coverage_15": 0.05858373343944549, "rewards/frontier_coverage_20": 0.11382308453321457, "rewards/frontier_coverage_25": 0.1784544587135315, "rewards/frontier_coverage_5": -0.0021017659455537796, "rewards/frontier_entropy_batch_reward": -0.2735038071870804, "signal/accuracy_reward/centered_abs_mean": 0.14811740517616273, "signal/accuracy_reward/group_std_mean": 0.19783127307891846, "signal/accuracy_reward/group_zero_std_frac": 0.4277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9213737845420837, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07405870258808137, "signal/advantage_abs_mean": 0.747747254371643, "signal/advantage_pre_scale_abs_mean": 0.1122934266924858, "signal/advantage_pre_scale_std": 0.18739549219608306, "signal/advantage_std": 0.9832320809364319, "signal/brier_reward/centered_abs_mean": 0.1451827973127365, "signal/brier_reward/group_std_mean": 0.18546995520591736, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18125930428504944, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014518279768526553, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05821017548441887, "signal/confidence_uniqueness_reward/group_std_mean": 0.0866745539009571, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07207210585474969, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005821018014103174, "signal/format_reward/centered_abs_mean": 0.0506022147834301, "signal/format_reward/group_std_mean": 0.07779108807444572, "signal/format_reward/group_zero_std_frac": 0.7305555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.3118982821702957, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02530110739171505, "signal/frontier_coverage_0/centered_abs_mean": 0.1833457052707672, "signal/frontier_coverage_0/group_std_mean": 0.2372480094432831, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03273410275578499, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002621843572705984, "signal/frontier_coverage_1/centered_abs_mean": 0.1833457052707672, "signal/frontier_coverage_1/group_std_mean": 0.2372480094432831, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03273410275578499, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002621843572705984, "signal/frontier_coverage_10/centered_abs_mean": 0.09884552955627442, "signal/frontier_coverage_10/group_std_mean": 0.1297825500369072, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017642829567193985, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014134910656139255, "signal/frontier_coverage_15/centered_abs_mean": 0.06081449687480926, "signal/frontier_coverage_15/group_std_mean": 0.07752282023429871, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010932053439319134, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008696473552845419, "signal/frontier_coverage_20/centered_abs_mean": 0.08544690757989884, "signal/frontier_coverage_20/group_std_mean": 0.10949314087629318, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015372510813176633, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012218907242640853, "signal/frontier_coverage_25/centered_abs_mean": 0.1205233633518219, "signal/frontier_coverage_25/group_std_mean": 0.15510833263397217, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021653353795409204, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017234840430319308, "signal/frontier_coverage_5/centered_abs_mean": 0.18253884911537172, "signal/frontier_coverage_5/group_std_mean": 0.23624739646911622, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03258874006569386, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026103056035935877, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32768973112106325, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3990603029727936, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4122699022293091, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032768973335623744, "step": 280 }, { "calibration/aurc": 0.16161388497444104, "calibration/batch_distribution_entropy": 0.954786223675052, "calibration/buffer_distribution_entropy": 0.9822368662432682, "calibration/confidence_entropy": 0.4893096755488703, "calibration/coverage@0%": 0.012397262621378627, "calibration/coverage@1%": 0.012397262621378627, "calibration/coverage@10%": 0.3451227750696987, "calibration/coverage@15%": 0.42521536791355874, "calibration/coverage@20%": 0.8219908877178448, "calibration/coverage@25%": 0.9104244296288853, "calibration/coverage@30%": 0.9695187165775401, "calibration/coverage@5%": 0.15790624465730677, "calibration/ece": 0.1771468328440237, "calibration/mean_confidence": 0.6157620534496937, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06805555555555556, "completions/max_length": 4061.2, "completions/max_terminated_length": 4061.2, "completions/mean_length": 1205.688525390625, "completions/mean_terminated_length": 1293.82998046875, "completions/min_length": 0.0, "completions/min_terminated_length": 326.6, "epoch": 0.6839914501068737, "grad_norm": 0.0017848203424364328, "learning_rate": 4.537259615384616e-06, "loss": -0.1574, "num_tokens": 653843328.0, "reward": 0.9423358082771301, "reward_std": 0.1858820378780365, "rewards/accuracy_reward": 0.6730902671813965, "rewards/brier_reward": 0.7743986010551452, "rewards/confidence_uniqueness_reward": 0.881925082206726, "rewards/format_reward": 0.9314236164093017, "rewards/frontier_coverage_0": 0.013644175603985786, "rewards/frontier_coverage_1": 0.013644175603985786, "rewards/frontier_coverage_10": 0.02565064523369074, "rewards/frontier_coverage_15": 0.07387386113405228, "rewards/frontier_coverage_20": 0.14157648533582687, "rewards/frontier_coverage_25": 0.2188367635011673, "rewards/frontier_coverage_5": 0.013734235800802708, "rewards/frontier_entropy_batch_reward": -0.32717219591140745, "signal/accuracy_reward/centered_abs_mean": 0.16214192807674407, "signal/accuracy_reward/group_std_mean": 0.21098495423793792, "signal/accuracy_reward/group_zero_std_frac": 0.4055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9308143734931946, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08107096403837204, "signal/advantage_abs_mean": 0.7548533916473389, "signal/advantage_pre_scale_abs_mean": 0.1412857949733734, "signal/advantage_pre_scale_std": 0.23110412061214447, "signal/advantage_std": 0.9833352923393249, "signal/brier_reward/centered_abs_mean": 0.1556008368730545, "signal/brier_reward/group_std_mean": 0.1984753429889679, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17860327661037445, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015560084208846092, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.09533466547727584, "signal/confidence_uniqueness_reward/group_std_mean": 0.1326120525598526, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.10921618342399597, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.009533467143774033, "signal/format_reward/centered_abs_mean": 0.08937717080116273, "signal/format_reward/group_std_mean": 0.12585532814264297, "signal/format_reward/group_zero_std_frac": 0.6083333373069764, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5117337226867675, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.04468858540058136, "signal/frontier_coverage_0/centered_abs_mean": 0.15532127320766448, "signal/frontier_coverage_0/group_std_mean": 0.20312093198299408, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02545154429972172, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002221094211563468, "signal/frontier_coverage_1/centered_abs_mean": 0.15532127320766448, "signal/frontier_coverage_1/group_std_mean": 0.20312093198299408, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02545154429972172, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002221094211563468, "signal/frontier_coverage_10/centered_abs_mean": 0.08915466368198395, "signal/frontier_coverage_10/group_std_mean": 0.11783302575349808, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014571213349699973, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012749116867780685, "signal/frontier_coverage_15/centered_abs_mean": 0.0656839594244957, "signal/frontier_coverage_15/group_std_mean": 0.08419227302074432, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010779930651187897, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009392806678079069, "signal/frontier_coverage_20/centered_abs_mean": 0.09882727265357971, "signal/frontier_coverage_20/group_std_mean": 0.12712617367506027, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016234659403562546, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014132299926131963, "signal/frontier_coverage_25/centered_abs_mean": 0.14026913046836853, "signal/frontier_coverage_25/group_std_mean": 0.18065418004989625, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02304657958447933, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020058486377820374, "signal/frontier_coverage_5/centered_abs_mean": 0.15472302138805388, "signal/frontier_coverage_5/group_std_mean": 0.2023627281188965, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02535347007215023, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022125390358269216, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3393124520778656, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40504205226898193, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3892597258090973, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0339312456548214, "step": 285 }, { "calibration/aurc": 0.14990839978575438, "calibration/batch_distribution_entropy": 0.9466328987518603, "calibration/buffer_distribution_entropy": 0.9817441485320545, "calibration/confidence_entropy": 0.4870812081068852, "calibration/coverage@0%": 0.022407624505672442, "calibration/coverage@1%": 0.022407624505672442, "calibration/coverage@10%": 0.32707504225792317, "calibration/coverage@15%": 0.5220147394055882, "calibration/coverage@20%": 0.7693151120990234, "calibration/coverage@25%": 0.8949514642116956, "calibration/coverage@30%": 0.9811842627395221, "calibration/coverage@5%": 0.13891238681756574, "calibration/ece": 0.11565863919638524, "calibration/mean_confidence": 0.6153538965509003, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05164930555555556, "completions/max_length": 4078.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 1220.135107421875, "completions/mean_terminated_length": 1287.0528076171875, "completions/min_length": 0.0, "completions/min_terminated_length": 326.6, "epoch": 0.6959913001087487, "grad_norm": 0.0017922352999448776, "learning_rate": 4.507211538461539e-06, "loss": -0.1127, "num_tokens": 670995188.0, "reward": 0.9570554256439209, "reward_std": 0.1720178633928299, "rewards/accuracy_reward": 0.6854166626930237, "rewards/brier_reward": 0.7753046989440918, "rewards/confidence_uniqueness_reward": 0.8973992824554443, "rewards/format_reward": 0.9480034589767456, "rewards/frontier_coverage_0": 0.002874659560620785, "rewards/frontier_coverage_1": 0.002884892001748085, "rewards/frontier_coverage_10": 0.015663625486195088, "rewards/frontier_coverage_15": 0.07120544984936714, "rewards/frontier_coverage_20": 0.13762863576412201, "rewards/frontier_coverage_25": 0.21375060081481934, "rewards/frontier_coverage_5": 0.0030614846386015416, "rewards/frontier_entropy_batch_reward": -0.3331816494464874, "signal/accuracy_reward/centered_abs_mean": 0.1560980886220932, "signal/accuracy_reward/group_std_mean": 0.20737068653106688, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9261637568473816, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0780490443110466, "signal/advantage_abs_mean": 0.7416696906089782, "signal/advantage_pre_scale_abs_mean": 0.12750329077243805, "signal/advantage_pre_scale_std": 0.2124664753675461, "signal/advantage_std": 0.9832707643508911, "signal/brier_reward/centered_abs_mean": 0.14848179519176483, "signal/brier_reward/group_std_mean": 0.19175553023815156, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17806673645973206, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0148481797426939, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07424457222223282, "signal/confidence_uniqueness_reward/group_std_mean": 0.10994532108306884, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08886821419000626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007424457184970379, "signal/format_reward/centered_abs_mean": 0.06673719584941865, "signal/format_reward/group_std_mean": 0.10154436230659485, "signal/format_reward/group_zero_std_frac": 0.6555555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.39830978512763976, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03336859792470932, "signal/frontier_coverage_0/centered_abs_mean": 0.1480401337146759, "signal/frontier_coverage_0/group_std_mean": 0.1944763779640198, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025522398948669433, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021169739309698345, "signal/frontier_coverage_1/centered_abs_mean": 0.1480236232280731, "signal/frontier_coverage_1/group_std_mean": 0.1944561183452606, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025519952923059464, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021167377941310405, "signal/frontier_coverage_10/centered_abs_mean": 0.08222000598907471, "signal/frontier_coverage_10/group_std_mean": 0.10998225659132004, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014115773141384125, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001175746088847518, "signal/frontier_coverage_15/centered_abs_mean": 0.06811703145503997, "signal/frontier_coverage_15/group_std_mean": 0.08626431375741958, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011764644645154476, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009740736102685332, "signal/frontier_coverage_20/centered_abs_mean": 0.10457015037536621, "signal/frontier_coverage_20/group_std_mean": 0.13225899040699005, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018054082244634628, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014953531557694078, "signal/frontier_coverage_25/centered_abs_mean": 0.14872863590717317, "signal/frontier_coverage_25/group_std_mean": 0.18871353566646576, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025641126558184624, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002126819547265768, "signal/frontier_coverage_5/centered_abs_mean": 0.1474991887807846, "signal/frontier_coverage_5/group_std_mean": 0.19379670023918152, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02542857564985752, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021092384587973355, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33809652328491213, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4022083759307861, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4096614599227905, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03380965441465378, "step": 290 }, { "calibration/aurc": 0.16839003318485468, "calibration/batch_distribution_entropy": 0.9647584219475576, "calibration/buffer_distribution_entropy": 0.9819259989411554, "calibration/confidence_entropy": 0.49521471378087234, "calibration/coverage@0%": 0.031917463514191564, "calibration/coverage@1%": 0.031917463514191564, "calibration/coverage@10%": 0.32524698829330323, "calibration/coverage@15%": 0.4671416904414487, "calibration/coverage@20%": 0.6470332978310416, "calibration/coverage@25%": 0.8226688925601093, "calibration/coverage@30%": 0.8954452772099831, "calibration/coverage@5%": 0.17303270682289534, "calibration/ece": 0.17856220266263753, "calibration/mean_confidence": 0.5398482647393512, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05104166666666667, "completions/max_length": 4080.8, "completions/max_terminated_length": 4080.8, "completions/mean_length": 1236.2116455078126, "completions/mean_terminated_length": 1303.0637939453125, "completions/min_length": 0.0, "completions/min_terminated_length": 344.6, "epoch": 0.7079911501106236, "grad_norm": 0.001917012850753963, "learning_rate": 4.477163461538462e-06, "loss": -0.1196, "num_tokens": 688321818.0, "reward": 0.9414116501808166, "reward_std": 0.1707327514886856, "rewards/accuracy_reward": 0.6397569417953491, "rewards/brier_reward": 0.7684574365615845, "rewards/confidence_uniqueness_reward": 0.9021074414253235, "rewards/format_reward": 0.9486979246139526, "rewards/frontier_coverage_0": 0.029088782146573067, "rewards/frontier_coverage_1": 0.029088782146573067, "rewards/frontier_coverage_10": 0.02980848792940378, "rewards/frontier_coverage_15": 0.06237031891942024, "rewards/frontier_coverage_20": 0.115201236307621, "rewards/frontier_coverage_25": 0.17910198271274566, "rewards/frontier_coverage_5": 0.029053746536374093, "rewards/frontier_entropy_batch_reward": -0.2664636343717575, "signal/accuracy_reward/centered_abs_mean": 0.15848524272441863, "signal/accuracy_reward/group_std_mean": 0.20676854848861695, "signal/accuracy_reward/group_zero_std_frac": 0.4194444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.967148220539093, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07924262136220932, "signal/advantage_abs_mean": 0.7537167191505432, "signal/advantage_pre_scale_abs_mean": 0.12879492938518525, "signal/advantage_pre_scale_std": 0.2127348393201828, "signal/advantage_std": 0.9832599520683288, "signal/brier_reward/centered_abs_mean": 0.1589398592710495, "signal/brier_reward/group_std_mean": 0.199799045920372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1937939405441284, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015893985889852046, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07883353978395462, "signal/confidence_uniqueness_reward/group_std_mean": 0.11305341869592667, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.09597876071929931, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007883354183286428, "signal/format_reward/centered_abs_mean": 0.07265082374215126, "signal/format_reward/group_std_mean": 0.10599421262741089, "signal/format_reward/group_zero_std_frac": 0.6611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.441804563999176, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03632541187107563, "signal/frontier_coverage_0/centered_abs_mean": 0.18514932990074157, "signal/frontier_coverage_0/group_std_mean": 0.23785326182842254, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032265615090727805, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00264763543382287, "signal/frontier_coverage_1/centered_abs_mean": 0.18514932990074157, "signal/frontier_coverage_1/group_std_mean": 0.23785326182842254, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032265615090727805, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00264763543382287, "signal/frontier_coverage_10/centered_abs_mean": 0.101753930747509, "signal/frontier_coverage_10/group_std_mean": 0.13369067162275314, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017728328704833984, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014550811843946575, "signal/frontier_coverage_15/centered_abs_mean": 0.0646564818918705, "signal/frontier_coverage_15/group_std_mean": 0.0819364532828331, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011319943889975547, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009245876688510179, "signal/frontier_coverage_20/centered_abs_mean": 0.09063917696475983, "signal/frontier_coverage_20/group_std_mean": 0.11509868800640106, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015911070629954338, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001296140206977725, "signal/frontier_coverage_25/centered_abs_mean": 0.12784689664840698, "signal/frontier_coverage_25/group_std_mean": 0.1628873258829117, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02245633341372013, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001828210591338575, "signal/frontier_coverage_5/centered_abs_mean": 0.184427148103714, "signal/frontier_coverage_5/group_std_mean": 0.23696613907814026, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03213928528130054, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026373081840574742, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31256569623947145, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3834408223628998, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.38236083984375, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031256569921970366, "step": 295 }, { "calibration/aurc": 0.12078319690376119, "calibration/batch_distribution_entropy": 0.9276962294256655, "calibration/buffer_distribution_entropy": 0.9821949128055089, "calibration/confidence_entropy": 0.4829753078536876, "calibration/coverage@0%": 0.03940227375009563, "calibration/coverage@1%": 0.03940227375009563, "calibration/coverage@10%": 0.42528907795356624, "calibration/coverage@15%": 0.722236752314575, "calibration/coverage@20%": 0.854863809636678, "calibration/coverage@25%": 0.9216402608421896, "calibration/coverage@30%": 0.9679343546671456, "calibration/coverage@5%": 0.27418103847954345, "calibration/ece": 0.11320676820814626, "calibration/mean_confidence": 0.6328128382779166, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.036111111111111115, "completions/max_length": 4062.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 1195.3615478515626, "completions/mean_terminated_length": 1240.4994140625, "completions/min_length": 0.0, "completions/min_terminated_length": 286.8, "epoch": 0.7199910001124986, "grad_norm": 0.0018773460760712624, "learning_rate": 4.447115384615385e-06, "loss": -0.0981, "num_tokens": 705193695.0, "reward": 0.9711301684379577, "reward_std": 0.15473923683166504, "rewards/accuracy_reward": 0.6816840291023254, "rewards/brier_reward": 0.8024370551109314, "rewards/confidence_uniqueness_reward": 0.9131479620933532, "rewards/format_reward": 0.9635416746139527, "rewards/frontier_coverage_0": 0.0319746870547533, "rewards/frontier_coverage_1": 0.0319746870547533, "rewards/frontier_coverage_10": 0.036012591794133186, "rewards/frontier_coverage_15": 0.07915448248386384, "rewards/frontier_coverage_20": 0.1457726925611496, "rewards/frontier_coverage_25": 0.22427182495594025, "rewards/frontier_coverage_5": 0.032045964151620865, "rewards/frontier_entropy_batch_reward": -0.3135247349739075, "signal/accuracy_reward/centered_abs_mean": 0.141259765625, "signal/accuracy_reward/group_std_mean": 0.18782249391078948, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9199034810066223, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0706298828125, "signal/advantage_abs_mean": 0.7486230492591858, "signal/advantage_pre_scale_abs_mean": 0.11478613466024398, "signal/advantage_pre_scale_std": 0.19818316996097565, "signal/advantage_std": 0.9831642985343934, "signal/brier_reward/centered_abs_mean": 0.1412952125072479, "signal/brier_reward/group_std_mean": 0.18475831747055055, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1854201763868332, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014129521884024143, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06423628926277161, "signal/confidence_uniqueness_reward/group_std_mean": 0.09473485499620438, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08357910513877868, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006423629261553288, "signal/format_reward/centered_abs_mean": 0.05540364682674408, "signal/format_reward/group_std_mean": 0.08460389599204063, "signal/format_reward/group_zero_std_frac": 0.7111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.3581722557544708, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02770182341337204, "signal/frontier_coverage_0/centered_abs_mean": 0.16017161309719086, "signal/frontier_coverage_0/group_std_mean": 0.21179381310939788, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029991919174790382, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022904541343450545, "signal/frontier_coverage_1/centered_abs_mean": 0.16017161309719086, "signal/frontier_coverage_1/group_std_mean": 0.21179381310939788, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029991919174790382, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022904541343450545, "signal/frontier_coverage_10/centered_abs_mean": 0.08997779488563537, "signal/frontier_coverage_10/group_std_mean": 0.12086162716150284, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016862975619733333, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012866824865341187, "signal/frontier_coverage_15/centered_abs_mean": 0.06877997815608979, "signal/frontier_coverage_15/group_std_mean": 0.08738072514533997, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013019220717251302, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009835536940954626, "signal/frontier_coverage_20/centered_abs_mean": 0.09969817698001862, "signal/frontier_coverage_20/group_std_mean": 0.1269403502345085, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018895361199975015, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014256839640438557, "signal/frontier_coverage_25/centered_abs_mean": 0.13946891725063323, "signal/frontier_coverage_25/group_std_mean": 0.1778358042240143, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026414673030376434, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001994405430741608, "signal/frontier_coverage_5/centered_abs_mean": 0.1596672624349594, "signal/frontier_coverage_5/group_std_mean": 0.21115436553955078, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0298971451818943, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022832419257611037, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34117398262023924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4078505575656891, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.451895147562027, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03411739841103554, "step": 300 }, { "epoch": 0.7199910001124986, "eval_calibration/aurc": 0.14135630737203364, "eval_calibration/batch_distribution_entropy": 0.8794302039147933, "eval_calibration/buffer_distribution_entropy": 0.9816624202161491, "eval_calibration/confidence_entropy": 0.47957698486476735, "eval_calibration/coverage@0%": 0.26187275985663083, "eval_calibration/coverage@1%": 0.26187275985663083, "eval_calibration/coverage@10%": 0.4733982974910394, "eval_calibration/coverage@15%": 0.5693436379928316, "eval_calibration/coverage@20%": 0.8225312075145222, "eval_calibration/coverage@25%": 0.9220059325176123, "eval_calibration/coverage@30%": 0.9833333333333334, "eval_calibration/coverage@5%": 0.27749775985663083, "eval_calibration/ece": 0.2013177881287851, "eval_calibration/mean_confidence": 0.639016019496972, "eval_completions/clipped_ratio": 0.033854166666666685, "eval_completions/max_length": 3558.6666666666665, "eval_completions/max_terminated_length": 3558.6666666666665, "eval_completions/mean_length": 1123.0069783528645, "eval_completions/mean_terminated_length": 1162.5458374023438, "eval_completions/min_length": 74.16666666666667, "eval_completions/min_terminated_length": 340.0, "eval_loss": 0.0, "eval_num_tokens": 705193695.0, "eval_reward": 0.8927736977736155, "eval_reward_std": 0.290027916431427, "eval_rewards/accuracy_reward": 0.668402781089147, "eval_rewards/brier_reward": 0.7983585397402445, "eval_rewards/confidence_uniqueness_reward": 0.8560836017131805, "eval_rewards/format_reward": 0.9618055621782938, "eval_rewards/frontier_coverage_0": 0.03550082134703795, "eval_rewards/frontier_coverage_1": 0.03550082134703795, "eval_rewards/frontier_coverage_10": 0.036372952007999025, "eval_rewards/frontier_coverage_15": 0.07793260862429936, "eval_rewards/frontier_coverage_20": 0.1448104108373324, "eval_rewards/frontier_coverage_25": 0.22215457757314047, "eval_rewards/frontier_coverage_5": 0.03555072944921752, "eval_rewards/frontier_entropy_batch_reward": -0.9618055621782938, "eval_runtime": 226.3973, "eval_samples_per_second": 4.417, "eval_signal/accuracy_reward/centered_abs_mean": 0.4308810730775197, "eval_signal/accuracy_reward/group_std_mean": 0.4704217165708542, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7596533397833506, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21544053653875986, "eval_signal/advantage_abs_mean": 0.8504889905452728, "eval_signal/advantage_pre_scale_abs_mean": 0.24683604389429092, "eval_signal/advantage_pre_scale_std": 0.28865334888299304, "eval_signal/advantage_std": 0.9864764511585236, "eval_signal/brier_reward/centered_abs_mean": 0.20618696510791779, "eval_signal/brier_reward/group_std_mean": 0.2769654293855031, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07253366460402806, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.020618697938819725, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.08746503914395969, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.1652904860675335, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030453757693370182, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008746504162748655, "eval_signal/format_reward/centered_abs_mean": 0.07161458333333333, "eval_signal/format_reward/group_std_mean": 0.16531085719664892, "eval_signal/format_reward/group_zero_std_frac": 0.22222222636143366, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.12363888944188754, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.035807291666666664, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.21314354240894318, "eval_signal/frontier_coverage_0/group_std_mean": 0.3162529617547989, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01078090537339449, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00304795258368055, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.21314354240894318, "eval_signal/frontier_coverage_1/group_std_mean": 0.3162529617547989, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01078090537339449, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00304795258368055, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.11475640162825584, "eval_signal/frontier_coverage_10/group_std_mean": 0.17651239037513733, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005812383955344558, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016410165311147769, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.10023303826649983, "eval_signal/frontier_coverage_15/group_std_mean": 0.12652035181721052, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005055272563671072, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014333324312853317, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.18804885198672613, "eval_signal/frontier_coverage_20/group_std_mean": 0.23272972305615744, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.00948300507540504, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002689098434833189, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2883477956056595, "eval_signal/frontier_coverage_25/group_std_mean": 0.3521091441313426, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014543836625913778, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004123373539187014, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.21239722271760306, "eval_signal/frontier_coverage_5/group_std_mean": 0.31527013083299, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.010743242222815752, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0030372802478571734, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.07161458333333333, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.16531085719664892, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.22222222636143366, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.024727776025732357, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0071614584885537624, "eval_steps_per_second": 0.027, "step": 300 }, { "epoch": 0.7199910001124986, "step": 300, "train_probe_calibration/aurc": 0.10742759964817182, "train_probe_calibration/batch_distribution_entropy": 0.8882509027982718, "train_probe_calibration/buffer_distribution_entropy": 0.9818274202682408, "train_probe_calibration/confidence_entropy": 0.46353220505448417, "train_probe_calibration/coverage@0%": 0.41935483870967744, "train_probe_calibration/coverage@1%": 0.41935483870967744, "train_probe_calibration/coverage@10%": 0.5690524193548386, "train_probe_calibration/coverage@15%": 0.6693548387096774, "train_probe_calibration/coverage@20%": 0.8776881720430106, "train_probe_calibration/coverage@25%": 0.9307795698924731, "train_probe_calibration/coverage@30%": 0.9734543010752689, "train_probe_calibration/coverage@5%": 0.47278225806451607, "train_probe_calibration/ece": 0.1894395127688172, "train_probe_calibration/mean_confidence": 0.6430341095430108, "train_probe_completions/clipped_ratio": 0.03368055555555555, "train_probe_completions/max_length": 3965.5, "train_probe_completions/max_terminated_length": 3965.5, "train_probe_completions/mean_length": 1158.6050821940105, "train_probe_completions/mean_terminated_length": 1198.6514078776042, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 332.8333333333333, "train_probe_loss": 0.0, "train_probe_num_tokens": 705193695.0, "train_probe_reward": 0.920450339714686, "train_probe_reward_std": 0.2729005167881648, "train_probe_rewards/accuracy_reward": 0.7170138955116272, "train_probe_rewards/brier_reward": 0.8147866725921631, "train_probe_rewards/confidence_uniqueness_reward": 0.855155328909556, "train_probe_rewards/format_reward": 0.9661458333333334, "train_probe_rewards/frontier_coverage_0": 0.017018629354424775, "train_probe_rewards/frontier_coverage_1": 0.017018629354424775, "train_probe_rewards/frontier_coverage_10": 0.026363508426584303, "train_probe_rewards/frontier_coverage_15": 0.08919420217474301, "train_probe_rewards/frontier_coverage_20": 0.1686765750249227, "train_probe_rewards/frontier_coverage_25": 0.2584116756916046, "train_probe_rewards/frontier_coverage_5": 0.01708184430996577, "train_probe_rewards/frontier_entropy_batch_reward": -0.9661458333333334, "train_probe_runtime": 225.113, "train_probe_samples_per_second": 4.442, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3924696197112401, "train_probe_signal/accuracy_reward/group_std_mean": 0.4475194712479909, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7479262252648672, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19623480985562006, "train_probe_signal/advantage_abs_mean": 0.8123607436815897, "train_probe_signal/advantage_pre_scale_abs_mean": 0.22337998201449713, "train_probe_signal/advantage_pre_scale_std": 0.27356437345345813, "train_probe_signal/advantage_std": 0.9864482978979746, "train_probe_signal/brier_reward/centered_abs_mean": 0.1891830414533615, "train_probe_signal/brier_reward/group_std_mean": 0.2585650583108266, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07193443675835927, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.018918303151925404, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0852524774769942, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.15537608787417412, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03227034925172726, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008525247918441892, "train_probe_signal/format_reward/centered_abs_mean": 0.06331380208333333, "train_probe_signal/format_reward/group_std_mean": 0.14678262422482172, "train_probe_signal/format_reward/group_zero_std_frac": 0.305555559694767, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.11950643360614777, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.031656901041666664, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2071551432212194, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.31829215089480084, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.011326478483776251, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002962318443072339, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2071551432212194, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.31829215089480084, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.011326478483776251, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002962318443072339, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.11016559849182765, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.17865454157193503, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006017673372601469, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015753680490888655, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.09809967502951622, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.12173208470145862, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005335134997343023, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014028252529290814, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18012393762667975, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2199820727109909, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009804089398433765, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025757723099862537, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.27247088154157, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.33070384462674457, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014833726920187473, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038963335876663527, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2064915026227633, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.31740186115105945, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01129010800893108, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002952828382452329, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.06331380208333333, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.14678262422482172, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.305555559694767, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.023901287155846756, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006331380379075806, "train_probe_steps_per_second": 0.027 }, { "calibration/aurc": 0.1552747603116913, "calibration/batch_distribution_entropy": 0.9694031593763339, "calibration/buffer_distribution_entropy": 0.981981676252941, "calibration/confidence_entropy": 0.4865835211273901, "calibration/coverage@0%": 0.07719467074264807, "calibration/coverage@1%": 0.08500717074264807, "calibration/coverage@10%": 0.41726822600335883, "calibration/coverage@15%": 0.6069180800905718, "calibration/coverage@20%": 0.7214083784261371, "calibration/coverage@25%": 0.8297344820813896, "calibration/coverage@30%": 0.8809117190489004, "calibration/coverage@5%": 0.2596130214264611, "calibration/ece": 0.12211821587917515, "calibration/mean_confidence": 0.572359487035709, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.039843749999999976, "completions/max_length": 4082.4, "completions/max_terminated_length": 4082.4, "completions/mean_length": 1209.8580322265625, "completions/mean_terminated_length": 1260.188671875, "completions/min_length": 0.0, "completions/min_terminated_length": 287.8, "epoch": 0.7319908501143736, "grad_norm": 0.00178525282535702, "learning_rate": 4.4170673076923085e-06, "loss": -0.0993, "num_tokens": 722246043.0, "reward": 0.9593374133110046, "reward_std": 0.1680666208267212, "rewards/accuracy_reward": 0.6590277671813964, "rewards/brier_reward": 0.7911885142326355, "rewards/confidence_uniqueness_reward": 0.9110708355903625, "rewards/format_reward": 0.9598958253860473, "rewards/frontier_coverage_0": 0.03790535945445299, "rewards/frontier_coverage_1": 0.03790535945445299, "rewards/frontier_coverage_10": 0.03527109958231449, "rewards/frontier_coverage_15": 0.07195997387170791, "rewards/frontier_coverage_20": 0.13255858570337295, "rewards/frontier_coverage_25": 0.20439787209033966, "rewards/frontier_coverage_5": 0.037907212227582934, "rewards/frontier_entropy_batch_reward": -0.28328379392623904, "signal/accuracy_reward/centered_abs_mean": 0.17368706464767455, "signal/accuracy_reward/group_std_mean": 0.2288749635219574, "signal/accuracy_reward/group_zero_std_frac": 0.35277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9983350157737731, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08684353232383728, "signal/advantage_abs_mean": 0.7509189486503601, "signal/advantage_pre_scale_abs_mean": 0.1267881840467453, "signal/advantage_pre_scale_std": 0.20221469700336456, "signal/advantage_std": 0.983331310749054, "signal/brier_reward/centered_abs_mean": 0.1563771367073059, "signal/brier_reward/group_std_mean": 0.19976865351200104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17966135144233703, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015637714229524136, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06652242168784142, "signal/confidence_uniqueness_reward/group_std_mean": 0.09470682889223099, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07581292390823365, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006652241852134466, "signal/format_reward/centered_abs_mean": 0.05766059011220932, "signal/format_reward/group_std_mean": 0.08429588973522187, "signal/format_reward/group_zero_std_frac": 0.7277777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.32826632261276245, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02883029505610466, "signal/frontier_coverage_0/centered_abs_mean": 0.17973176538944244, "signal/frontier_coverage_0/group_std_mean": 0.23481388986110688, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029676606878638268, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002570164296776056, "signal/frontier_coverage_1/centered_abs_mean": 0.17973176538944244, "signal/frontier_coverage_1/group_std_mean": 0.23481388986110688, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029676606878638268, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002570164296776056, "signal/frontier_coverage_10/centered_abs_mean": 0.1035152941942215, "signal/frontier_coverage_10/group_std_mean": 0.13700433671474457, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017153031565248965, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014802686870098113, "signal/frontier_coverage_15/centered_abs_mean": 0.07050866931676865, "signal/frontier_coverage_15/group_std_mean": 0.08939850181341172, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011573206260800361, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010082739987410604, "signal/frontier_coverage_20/centered_abs_mean": 0.10300731658935547, "signal/frontier_coverage_20/group_std_mean": 0.13183286190032958, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016876140236854555, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00147300458047539, "signal/frontier_coverage_25/centered_abs_mean": 0.14747728109359742, "signal/frontier_coverage_25/group_std_mean": 0.1892870306968689, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02416311949491501, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021089251386001706, "signal/frontier_coverage_5/centered_abs_mean": 0.17917352616786958, "signal/frontier_coverage_5/group_std_mean": 0.2341161251068115, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02958527356386185, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025621813256293535, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193290412425995, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38982054591178894, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3665937602519989, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193290457129479, "step": 305 }, { "calibration/aurc": 0.1301794676312849, "calibration/batch_distribution_entropy": 0.9625952686521343, "calibration/buffer_distribution_entropy": 0.9821861615896477, "calibration/confidence_entropy": 0.49521016438445387, "calibration/coverage@0%": 0.03926732235685257, "calibration/coverage@1%": 0.03926732235685257, "calibration/coverage@10%": 0.5030335020335958, "calibration/coverage@15%": 0.6679785970132144, "calibration/coverage@20%": 0.8117728337153588, "calibration/coverage@25%": 0.896987333105101, "calibration/coverage@30%": 0.9451086956521738, "calibration/coverage@5%": 0.28356458982235416, "calibration/ece": 0.18017739107118977, "calibration/mean_confidence": 0.5452775766771263, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011197916666666651, "completions/max_length": 4044.2, "completions/max_terminated_length": 4044.2, "completions/mean_length": 1101.816162109375, "completions/mean_terminated_length": 1114.2768798828124, "completions/min_length": 0.0, "completions/min_terminated_length": 294.0, "epoch": 0.7439907001162486, "grad_norm": 0.002012076321989298, "learning_rate": 4.3870192307692315e-06, "loss": -0.0239, "num_tokens": 738024053.0, "reward": 0.9944738268852233, "reward_std": 0.131025093793869, "rewards/accuracy_reward": 0.6964409589767456, "rewards/brier_reward": 0.8037665963172913, "rewards/confidence_uniqueness_reward": 0.9392407178878784, "rewards/format_reward": 0.9881944417953491, "rewards/frontier_coverage_0": 0.01375200878828764, "rewards/frontier_coverage_1": 0.01375200878828764, "rewards/frontier_coverage_10": 0.026098747923970222, "rewards/frontier_coverage_15": 0.06501503065228462, "rewards/frontier_coverage_20": 0.12437251508235932, "rewards/frontier_coverage_25": 0.1967363566160202, "rewards/frontier_coverage_5": 0.01382654495537281, "rewards/frontier_entropy_batch_reward": -0.28630446195602416, "signal/accuracy_reward/centered_abs_mean": 0.1478461354970932, "signal/accuracy_reward/group_std_mean": 0.19731209874153138, "signal/accuracy_reward/group_zero_std_frac": 0.42777777910232545, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9674266099929809, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0739230677485466, "signal/advantage_abs_mean": 0.7382626414299012, "signal/advantage_pre_scale_abs_mean": 0.09559480696916581, "signal/advantage_pre_scale_std": 0.1574586659669876, "signal/advantage_std": 0.9831644654273987, "signal/brier_reward/centered_abs_mean": 0.12786187827587128, "signal/brier_reward/group_std_mean": 0.16800876557826996, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16815095245838166, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01278618685901165, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031389427930116655, "signal/confidence_uniqueness_reward/group_std_mean": 0.05517948716878891, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041796249151229856, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0031389428302645685, "signal/format_reward/centered_abs_mean": 0.02082248255610466, "signal/format_reward/group_std_mean": 0.04276073575019836, "signal/format_reward/group_zero_std_frac": 0.8111111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1391352728009224, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01041124127805233, "signal/frontier_coverage_0/centered_abs_mean": 0.17754314541816713, "signal/frontier_coverage_0/group_std_mean": 0.2308153033256531, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033347847312688826, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002538866922259331, "signal/frontier_coverage_1/centered_abs_mean": 0.17754314541816713, "signal/frontier_coverage_1/group_std_mean": 0.2308153033256531, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033347847312688826, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002538866922259331, "signal/frontier_coverage_10/centered_abs_mean": 0.09596252888441086, "signal/frontier_coverage_10/group_std_mean": 0.12769888788461686, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01809079311788082, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013722641393542289, "signal/frontier_coverage_15/centered_abs_mean": 0.06407084167003632, "signal/frontier_coverage_15/group_std_mean": 0.08187362253665924, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012147468142211437, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009162130183540285, "signal/frontier_coverage_20/centered_abs_mean": 0.09221599698066711, "signal/frontier_coverage_20/group_std_mean": 0.11842593848705292, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01745458468794823, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013186887372285128, "signal/frontier_coverage_25/centered_abs_mean": 0.13106826245784758, "signal/frontier_coverage_25/group_std_mean": 0.168903848528862, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024775386229157446, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018742761574685573, "signal/frontier_coverage_5/centered_abs_mean": 0.17703687250614167, "signal/frontier_coverage_5/group_std_mean": 0.23018013834953308, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03325313590466976, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002531627379357815, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3299850106239319, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3956158757209778, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4360787570476532, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03299850150942803, "step": 310 }, { "calibration/aurc": 0.1366955880628234, "calibration/batch_distribution_entropy": 0.9618162442123754, "calibration/buffer_distribution_entropy": 0.9831649324424259, "calibration/confidence_entropy": 0.46992879118155806, "calibration/coverage@0%": 0.03888815794752962, "calibration/coverage@1%": 0.07458369600527241, "calibration/coverage@10%": 0.5571547123475279, "calibration/coverage@15%": 0.6430476811145726, "calibration/coverage@20%": 0.689326318413781, "calibration/coverage@25%": 0.8121137739326814, "calibration/coverage@30%": 0.948553667633651, "calibration/coverage@5%": 0.29792946199505, "calibration/ece": 0.17161861650452742, "calibration/mean_confidence": 0.5649337775994152, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 4013.2, "completions/max_terminated_length": 4013.2, "completions/mean_length": 1085.9829833984375, "completions/mean_terminated_length": 1097.5322021484376, "completions/min_length": 0.0, "completions/min_terminated_length": 277.8, "epoch": 0.7559905501181235, "grad_norm": 0.0018735633930191398, "learning_rate": 4.356971153846154e-06, "loss": -0.0221, "num_tokens": 753646737.0, "reward": 0.9998907804489136, "reward_std": 0.12072976231575012, "rewards/accuracy_reward": 0.70703125, "rewards/brier_reward": 0.8256863355636597, "rewards/confidence_uniqueness_reward": 0.9371930837631226, "rewards/format_reward": 0.9894965410232544, "rewards/frontier_coverage_0": 0.031487956270575525, "rewards/frontier_coverage_1": 0.031487956270575525, "rewards/frontier_coverage_10": 0.03752702244091779, "rewards/frontier_coverage_15": 0.0800102636218071, "rewards/frontier_coverage_20": 0.15115441083908082, "rewards/frontier_coverage_25": 0.23473278284072877, "rewards/frontier_coverage_5": 0.03159824721515179, "rewards/frontier_entropy_batch_reward": -0.33212465047836304, "signal/accuracy_reward/centered_abs_mean": 0.1304741770029068, "signal/accuracy_reward/group_std_mean": 0.17876172661781312, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496649146080017, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0652370885014534, "signal/advantage_abs_mean": 0.7483431100845337, "signal/advantage_pre_scale_abs_mean": 0.08890924602746964, "signal/advantage_pre_scale_std": 0.149673992395401, "signal/advantage_std": 0.9830163478851318, "signal/brier_reward/centered_abs_mean": 0.12116939425468445, "signal/brier_reward/group_std_mean": 0.15918799936771394, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17770620584487914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012116939388215541, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030384134128689767, "signal/confidence_uniqueness_reward/group_std_mean": 0.04616203308105469, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.044592789560556415, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003038413543254137, "signal/format_reward/centered_abs_mean": 0.016954209841787814, "signal/format_reward/group_std_mean": 0.02980217821896076, "signal/format_reward/group_zero_std_frac": 0.8833333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12439082860946656, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008477104920893907, "signal/frontier_coverage_0/centered_abs_mean": 0.1542992562055588, "signal/frontier_coverage_0/group_std_mean": 0.205556982755661, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032318027690052986, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022064793156459928, "signal/frontier_coverage_1/centered_abs_mean": 0.1542992562055588, "signal/frontier_coverage_1/group_std_mean": 0.205556982755661, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032318027690052986, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022064793156459928, "signal/frontier_coverage_10/centered_abs_mean": 0.08488886803388596, "signal/frontier_coverage_10/group_std_mean": 0.11515198647975922, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017752321809530257, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012139108264818787, "signal/frontier_coverage_15/centered_abs_mean": 0.06964522302150726, "signal/frontier_coverage_15/group_std_mean": 0.08731711953878403, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014650024473667145, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009959267335943878, "signal/frontier_coverage_20/centered_abs_mean": 0.10206463634967804, "signal/frontier_coverage_20/group_std_mean": 0.12885676175355912, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021465276554226875, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001459524338133633, "signal/frontier_coverage_25/centered_abs_mean": 0.14236523509025573, "signal/frontier_coverage_25/group_std_mean": 0.1811072200536728, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029910705611109735, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002035822859033942, "signal/frontier_coverage_5/centered_abs_mean": 0.1539049506187439, "signal/frontier_coverage_5/group_std_mean": 0.2050553798675537, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03223489001393318, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022008407860994337, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34531130194664, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4141314446926117, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5073081076145172, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03453113064169884, "step": 315 }, { "calibration/aurc": 0.1532792820022398, "calibration/batch_distribution_entropy": 0.9765106952542615, "calibration/buffer_distribution_entropy": 0.9830529412034188, "calibration/confidence_entropy": 0.48771022732878855, "calibration/coverage@0%": 0.0503317881131927, "calibration/coverage@1%": 0.0503317881131927, "calibration/coverage@10%": 0.36972495473163974, "calibration/coverage@15%": 0.5327097308207017, "calibration/coverage@20%": 0.7291624390348835, "calibration/coverage@25%": 0.8552638694054415, "calibration/coverage@30%": 0.9451060675235279, "calibration/coverage@5%": 0.10596526819151841, "calibration/ece": 0.1575374552154884, "calibration/mean_confidence": 0.5512514130689602, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013454861111111115, "completions/max_length": 4019.6, "completions/max_terminated_length": 4019.6, "completions/mean_length": 1104.9857666015625, "completions/mean_terminated_length": 1119.9865234375, "completions/min_length": 0.0, "completions/min_terminated_length": 297.4, "epoch": 0.7679904001199985, "grad_norm": 0.002138911047950387, "learning_rate": 4.326923076923077e-06, "loss": -0.0288, "num_tokens": 769469421.0, "reward": 0.9860237836837769, "reward_std": 0.12763068974018096, "rewards/accuracy_reward": 0.6813367962837219, "rewards/brier_reward": 0.8128754019737243, "rewards/confidence_uniqueness_reward": 0.9359204173088074, "rewards/format_reward": 0.9860242962837219, "rewards/frontier_coverage_0": 0.03168969838880002, "rewards/frontier_coverage_1": 0.03168969838880002, "rewards/frontier_coverage_10": 0.03532592952251434, "rewards/frontier_coverage_15": 0.07133440673351288, "rewards/frontier_coverage_20": 0.13430507332086564, "rewards/frontier_coverage_25": 0.20982736349105835, "rewards/frontier_coverage_5": 0.03171238908544183, "rewards/frontier_entropy_batch_reward": -0.3034252643585205, "signal/accuracy_reward/centered_abs_mean": 0.13915473371744155, "signal/accuracy_reward/group_std_mean": 0.1901726096868515, "signal/accuracy_reward/group_zero_std_frac": 0.4361111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.949447751045227, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06957736685872078, "signal/advantage_abs_mean": 0.7514520049095154, "signal/advantage_pre_scale_abs_mean": 0.09422855377197266, "signal/advantage_pre_scale_std": 0.1557147890329361, "signal/advantage_std": 0.9831169128417969, "signal/brier_reward/centered_abs_mean": 0.1259630024433136, "signal/brier_reward/group_std_mean": 0.16163697242736816, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1725175768136978, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012596299685537816, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03393393531441689, "signal/confidence_uniqueness_reward/group_std_mean": 0.05093328282237053, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04678136818110943, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033933936385437844, "signal/format_reward/centered_abs_mean": 0.02191297700628638, "signal/format_reward/group_std_mean": 0.03627087995409965, "signal/format_reward/group_zero_std_frac": 0.8666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1513817459344864, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01095648850314319, "signal/frontier_coverage_0/centered_abs_mean": 0.1669444590806961, "signal/frontier_coverage_0/group_std_mean": 0.21839172542095184, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03264324963092804, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023873056750744583, "signal/frontier_coverage_1/centered_abs_mean": 0.1669444590806961, "signal/frontier_coverage_1/group_std_mean": 0.21839172542095184, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03264324963092804, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023873056750744583, "signal/frontier_coverage_10/centered_abs_mean": 0.0943936437368393, "signal/frontier_coverage_10/group_std_mean": 0.1261305809020996, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018443511798977852, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013498291140422226, "signal/frontier_coverage_15/centered_abs_mean": 0.06663856953382492, "signal/frontier_coverage_15/group_std_mean": 0.08290167152881622, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013056344538927078, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009529315866529942, "signal/frontier_coverage_20/centered_abs_mean": 0.09651756435632705, "signal/frontier_coverage_20/group_std_mean": 0.12127373814582824, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018911157548427582, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001380201173014939, "signal/frontier_coverage_25/centered_abs_mean": 0.13597081303596498, "signal/frontier_coverage_25/group_std_mean": 0.17202906012535096, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026631006225943565, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019443826517090202, "signal/frontier_coverage_5/centered_abs_mean": 0.16659797430038453, "signal/frontier_coverage_5/group_std_mean": 0.21796338260173798, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03257517628371716, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002382350992411375, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33233543038368224, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3965053200721741, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45477959513664246, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323354199528694, "step": 320 }, { "calibration/aurc": 0.15820918899673367, "calibration/batch_distribution_entropy": 0.9448516087091381, "calibration/buffer_distribution_entropy": 0.9834185299134219, "calibration/confidence_entropy": 0.5218543693267863, "calibration/coverage@0%": 0.009955946485114536, "calibration/coverage@1%": 0.009955946485114536, "calibration/coverage@10%": 0.29899489168223903, "calibration/coverage@15%": 0.5591289854743147, "calibration/coverage@20%": 0.8303993936284947, "calibration/coverage@25%": 0.9335853621861032, "calibration/coverage@30%": 0.9718015665796343, "calibration/coverage@5%": 0.0970347483189584, "calibration/ece": 0.17910890874457702, "calibration/mean_confidence": 0.6065439761899268, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008854166666666673, "completions/max_length": 4060.0, "completions/max_terminated_length": 4060.0, "completions/mean_length": 1113.4197265625, "completions/mean_terminated_length": 1123.4944091796874, "completions/min_length": 0.0, "completions/min_terminated_length": 248.2, "epoch": 0.7799902501218735, "grad_norm": 0.001976795494556427, "learning_rate": 4.296875e-06, "loss": -0.0172, "num_tokens": 785408880.0, "reward": 1.0012445449829102, "reward_std": 0.12599269896745682, "rewards/accuracy_reward": 0.7080729246139527, "rewards/brier_reward": 0.8102705597877502, "rewards/confidence_uniqueness_reward": 0.9413081288337708, "rewards/format_reward": 0.9911458373069764, "rewards/frontier_coverage_0": 0.004954400286078453, "rewards/frontier_coverage_1": 0.004954400286078453, "rewards/frontier_coverage_10": 0.01719066435471177, "rewards/frontier_coverage_15": 0.06398663446307182, "rewards/frontier_coverage_20": 0.12680090814828873, "rewards/frontier_coverage_25": 0.20377641618251802, "rewards/frontier_coverage_5": 0.005010297335684299, "rewards/frontier_entropy_batch_reward": -0.2962413549423218, "signal/accuracy_reward/centered_abs_mean": 0.15307616889476777, "signal/accuracy_reward/group_std_mean": 0.19774922728538513, "signal/accuracy_reward/group_zero_std_frac": 0.44444444179534914, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0481387495994567, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07653808444738389, "signal/advantage_abs_mean": 0.7635510325431824, "signal/advantage_pre_scale_abs_mean": 0.09697668254375458, "signal/advantage_pre_scale_std": 0.15258235931396485, "signal/advantage_std": 0.9831050872802735, "signal/brier_reward/centered_abs_mean": 0.12253386676311492, "signal/brier_reward/group_std_mean": 0.15739177763462067, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16855136752128602, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012253387458622455, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026244480162858963, "signal/confidence_uniqueness_reward/group_std_mean": 0.03955877721309662, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03617042452096939, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026244479697197674, "signal/format_reward/centered_abs_mean": 0.013921440858393908, "signal/format_reward/group_std_mean": 0.02432932294905186, "signal/format_reward/group_zero_std_frac": 0.9055555582046508, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09468984603881836, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006960720429196954, "signal/frontier_coverage_0/centered_abs_mean": 0.16732266545295715, "signal/frontier_coverage_0/group_std_mean": 0.21497032344341277, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03277806714177132, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002392714167945087, "signal/frontier_coverage_1/centered_abs_mean": 0.16732266545295715, "signal/frontier_coverage_1/group_std_mean": 0.21497032344341277, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03277806714177132, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002392714167945087, "signal/frontier_coverage_10/centered_abs_mean": 0.09474649280309677, "signal/frontier_coverage_10/group_std_mean": 0.12416742146015167, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01850355453789234, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013548748102039099, "signal/frontier_coverage_15/centered_abs_mean": 0.06477891355752945, "signal/frontier_coverage_15/group_std_mean": 0.08186470121145248, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012831047736108303, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009263384621590376, "signal/frontier_coverage_20/centered_abs_mean": 0.09724516570568084, "signal/frontier_coverage_20/group_std_mean": 0.123662668466568, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019288834184408188, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013906059321016073, "signal/frontier_coverage_25/centered_abs_mean": 0.1411896228790283, "signal/frontier_coverage_25/group_std_mean": 0.17907672822475434, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02798592709004879, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002019011718221009, "signal/frontier_coverage_5/centered_abs_mean": 0.16672602891921998, "signal/frontier_coverage_5/group_std_mean": 0.21423827409744262, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032660551741719245, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023841822519898416, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3291071951389313, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3969504415988922, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4558913826942444, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03291071951389313, "step": 325 }, { "calibration/aurc": 0.156909006067032, "calibration/batch_distribution_entropy": 0.9543305090526145, "calibration/buffer_distribution_entropy": 0.9825246543342956, "calibration/confidence_entropy": 0.5014532464678767, "calibration/coverage@0%": 0.03240064965769619, "calibration/coverage@1%": 0.03240064965769619, "calibration/coverage@10%": 0.3325626616081171, "calibration/coverage@15%": 0.5786603371706475, "calibration/coverage@20%": 0.8025542499093617, "calibration/coverage@25%": 0.9022413212273618, "calibration/coverage@30%": 0.9383812010443865, "calibration/coverage@5%": 0.1835917703498551, "calibration/ece": 0.1576187415510833, "calibration/mean_confidence": 0.60028896248539, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008072916666666652, "completions/max_length": 4059.2, "completions/max_terminated_length": 4059.2, "completions/mean_length": 1062.9158813476563, "completions/mean_terminated_length": 1071.6300903320312, "completions/min_length": 0.0, "completions/min_terminated_length": 268.0, "epoch": 0.7919901001237485, "grad_norm": 0.0017952125053852797, "learning_rate": 4.266826923076923e-06, "loss": -0.0144, "num_tokens": 800758663.0, "reward": 1.0048826813697815, "reward_std": 0.12268615663051605, "rewards/accuracy_reward": 0.7028645753860474, "rewards/brier_reward": 0.8274922251701355, "rewards/confidence_uniqueness_reward": 0.9430109739303589, "rewards/format_reward": 0.9919270753860474, "rewards/frontier_coverage_0": 0.03261248916387558, "rewards/frontier_coverage_1": 0.03261248916387558, "rewards/frontier_coverage_10": 0.04394304975867271, "rewards/frontier_coverage_15": 0.07767296582460403, "rewards/frontier_coverage_20": 0.1443429633975029, "rewards/frontier_coverage_25": 0.22653323411941528, "rewards/frontier_coverage_5": 0.03271240890026093, "rewards/frontier_entropy_batch_reward": -0.2800663381814957, "signal/accuracy_reward/centered_abs_mean": 0.14945203959941863, "signal/accuracy_reward/group_std_mean": 0.19367357790470124, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0694059491157533, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07472601979970932, "signal/advantage_abs_mean": 0.7655353307723999, "signal/advantage_pre_scale_abs_mean": 0.0933989018201828, "signal/advantage_pre_scale_std": 0.14949961602687836, "signal/advantage_std": 0.9830429792404175, "signal/brier_reward/centered_abs_mean": 0.11935856491327286, "signal/brier_reward/group_std_mean": 0.15586462020874023, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17148438096046448, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011935856752097606, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02573820427060127, "signal/confidence_uniqueness_reward/group_std_mean": 0.041389158368110655, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.037190504372119904, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002573820622637868, "signal/format_reward/centered_abs_mean": 0.01385091133415699, "signal/format_reward/group_std_mean": 0.026973145455121993, "signal/format_reward/group_zero_std_frac": 0.8861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09901182055473327, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006925455667078495, "signal/frontier_coverage_0/centered_abs_mean": 0.17266753017902375, "signal/frontier_coverage_0/group_std_mean": 0.22434581220149993, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03531235456466675, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024691457394510506, "signal/frontier_coverage_1/centered_abs_mean": 0.17266753017902375, "signal/frontier_coverage_1/group_std_mean": 0.22434581220149993, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03531235456466675, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024691457394510506, "signal/frontier_coverage_10/centered_abs_mean": 0.09966813772916794, "signal/frontier_coverage_10/group_std_mean": 0.13157041370868683, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020390734449028967, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014252542983740567, "signal/frontier_coverage_15/centered_abs_mean": 0.06750372946262359, "signal/frontier_coverage_15/group_std_mean": 0.08493886291980743, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013990617357194424, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009653033805079759, "signal/frontier_coverage_20/centered_abs_mean": 0.09670436531305313, "signal/frontier_coverage_20/group_std_mean": 0.1224692091345787, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02009350135922432, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013828724855557083, "signal/frontier_coverage_25/centered_abs_mean": 0.13782588988542557, "signal/frontier_coverage_25/group_std_mean": 0.1750126987695694, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028620368614792824, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001970910234376788, "signal/frontier_coverage_5/centered_abs_mean": 0.1721877634525299, "signal/frontier_coverage_5/group_std_mean": 0.22375437915325164, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03521372601389885, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024622850120067596, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32352485656738283, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38960899114608766, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46970110535621645, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03235248811542988, "step": 330 }, { "calibration/aurc": 0.25974679160997044, "calibration/batch_distribution_entropy": 0.9760155955492031, "calibration/buffer_distribution_entropy": 0.9828829099511015, "calibration/confidence_entropy": 0.48294149594195723, "calibration/coverage@0%": 0.024704542613912535, "calibration/coverage@1%": 0.024704542613912535, "calibration/coverage@10%": 0.16822392267418446, "calibration/coverage@15%": 0.23947779930752344, "calibration/coverage@20%": 0.30226532800539135, "calibration/coverage@25%": 0.4429456127437903, "calibration/coverage@30%": 0.6829346276603824, "calibration/coverage@5%": 0.07391920229977642, "calibration/ece": 0.17664347223114546, "calibration/mean_confidence": 0.5079064995383955, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008246527777777768, "completions/max_length": 4031.8, "completions/max_terminated_length": 4031.8, "completions/mean_length": 1115.892529296875, "completions/mean_terminated_length": 1125.290966796875, "completions/min_length": 0.0, "completions/min_terminated_length": 305.8, "epoch": 0.8039899501256235, "grad_norm": 0.0018948515644297004, "learning_rate": 4.236778846153847e-06, "loss": -0.0163, "num_tokens": 816738929.0, "reward": 0.9759376645088196, "reward_std": 0.12409429550170899, "rewards/accuracy_reward": 0.6448784708976746, "rewards/brier_reward": 0.8030743598937988, "rewards/confidence_uniqueness_reward": 0.9439346671104432, "rewards/format_reward": 0.9916666507720947, "rewards/frontier_coverage_0": 0.058890349417924884, "rewards/frontier_coverage_1": 0.058890349417924884, "rewards/frontier_coverage_10": 0.05147294811904431, "rewards/frontier_coverage_15": 0.06580731272697449, "rewards/frontier_coverage_20": 0.11382190585136413, "rewards/frontier_coverage_25": 0.17733904123306274, "rewards/frontier_coverage_5": 0.05892799347639084, "rewards/frontier_entropy_batch_reward": -0.25403423607349396, "signal/accuracy_reward/centered_abs_mean": 0.15788303017616273, "signal/accuracy_reward/group_std_mean": 0.20750285685062408, "signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0875071048736573, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07894151508808137, "signal/advantage_abs_mean": 0.7426583290100097, "signal/advantage_pre_scale_abs_mean": 0.09224161058664322, "signal/advantage_pre_scale_std": 0.14779528975486755, "signal/advantage_std": 0.9830945491790771, "signal/brier_reward/centered_abs_mean": 0.13514640033245087, "signal/brier_reward/group_std_mean": 0.17495015859603882, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1872038722038269, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013514639995992183, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024792130663990976, "signal/confidence_uniqueness_reward/group_std_mean": 0.04206196665763855, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03454387187957764, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002479213196784258, "signal/format_reward/centered_abs_mean": 0.01342230886220932, "signal/format_reward/group_std_mean": 0.028304946422576905, "signal/format_reward/group_zero_std_frac": 0.8722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09320384189486504, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00671115443110466, "signal/frontier_coverage_0/centered_abs_mean": 0.20540903508663177, "signal/frontier_coverage_0/group_std_mean": 0.2632716208696365, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04065249636769295, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0029373492114245893, "signal/frontier_coverage_1/centered_abs_mean": 0.20540903508663177, "signal/frontier_coverage_1/group_std_mean": 0.2632716208696365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04065249636769295, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0029373492114245893, "signal/frontier_coverage_10/centered_abs_mean": 0.11642617136240005, "signal/frontier_coverage_10/group_std_mean": 0.15153419971466064, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02303205505013466, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0016648941906169056, "signal/frontier_coverage_15/centered_abs_mean": 0.06793319284915925, "signal/frontier_coverage_15/group_std_mean": 0.08559278100728988, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01348678469657898, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009714446961879731, "signal/frontier_coverage_20/centered_abs_mean": 0.0874357521533966, "signal/frontier_coverage_20/group_std_mean": 0.11122221052646637, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017344103008508683, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012503312435001134, "signal/frontier_coverage_25/centered_abs_mean": 0.12143271416425705, "signal/frontier_coverage_25/group_std_mean": 0.1560031145811081, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02407735027372837, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017364878440275788, "signal/frontier_coverage_5/centered_abs_mean": 0.20495485663414, "signal/frontier_coverage_5/group_std_mean": 0.26270574927330015, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04056171998381615, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002930854447185993, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3120520055294037, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38307093977928164, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43333314061164857, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031205202266573907, "step": 335 }, { "calibration/aurc": 0.1777603587436345, "calibration/batch_distribution_entropy": 0.9676360482311539, "calibration/buffer_distribution_entropy": 0.9837100346462169, "calibration/confidence_entropy": 0.48384860471380947, "calibration/coverage@0%": 0.05912567353597281, "calibration/coverage@1%": 0.07817329258359186, "calibration/coverage@10%": 0.3694769900916339, "calibration/coverage@15%": 0.551472741440538, "calibration/coverage@20%": 0.6478955782906988, "calibration/coverage@25%": 0.7625764361126298, "calibration/coverage@30%": 0.8157900272755771, "calibration/coverage@5%": 0.19237191228849984, "calibration/ece": 0.12225110228153846, "calibration/mean_confidence": 0.5703271550941463, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008159722222222231, "completions/max_length": 4018.6, "completions/max_terminated_length": 4018.6, "completions/mean_length": 1021.0284057617188, "completions/mean_terminated_length": 1029.4432495117187, "completions/min_length": 0.0, "completions/min_terminated_length": 259.0, "epoch": 0.8159898001274984, "grad_norm": 0.00172898406162858, "learning_rate": 4.20673076923077e-06, "loss": -0.0258, "num_tokens": 831595448.0, "reward": 0.986263906955719, "reward_std": 0.12218181192874908, "rewards/accuracy_reward": 0.6758680582046509, "rewards/brier_reward": 0.8213988423347474, "rewards/confidence_uniqueness_reward": 0.9391404390335083, "rewards/format_reward": 0.9915798544883728, "rewards/frontier_coverage_0": 0.0459395432844758, "rewards/frontier_coverage_1": 0.0459395432844758, "rewards/frontier_coverage_10": 0.04152496140450239, "rewards/frontier_coverage_15": 0.07311205416917801, "rewards/frontier_coverage_20": 0.13377267271280288, "rewards/frontier_coverage_25": 0.20741928815841676, "rewards/frontier_coverage_5": 0.045927997678518295, "rewards/frontier_entropy_batch_reward": -0.3200296819210052, "signal/accuracy_reward/centered_abs_mean": 0.13541666567325591, "signal/accuracy_reward/group_std_mean": 0.18041078448295594, "signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.963217580318451, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06770833283662796, "signal/advantage_abs_mean": 0.7463102102279663, "signal/advantage_pre_scale_abs_mean": 0.09043871462345124, "signal/advantage_pre_scale_std": 0.14913626313209533, "signal/advantage_std": 0.9830648899078369, "signal/brier_reward/centered_abs_mean": 0.12320598512887955, "signal/brier_reward/group_std_mean": 0.16074151992797853, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17543665766716005, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01232059821486473, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02815811224281788, "signal/confidence_uniqueness_reward/group_std_mean": 0.04658014327287674, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04017215184867382, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028158111963421106, "signal/format_reward/centered_abs_mean": 0.01487087681889534, "signal/format_reward/group_std_mean": 0.03009483590722084, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10635680183768273, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00743543840944767, "signal/frontier_coverage_0/centered_abs_mean": 0.15372433960437776, "signal/frontier_coverage_0/group_std_mean": 0.2000808149576187, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03130386024713516, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021982579957693813, "signal/frontier_coverage_1/centered_abs_mean": 0.15372433960437776, "signal/frontier_coverage_1/group_std_mean": 0.2000808149576187, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03130386024713516, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021982579957693813, "signal/frontier_coverage_10/centered_abs_mean": 0.0841196671128273, "signal/frontier_coverage_10/group_std_mean": 0.11117487698793412, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017140276730060577, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012029112316668034, "signal/frontier_coverage_15/centered_abs_mean": 0.06903370916843414, "signal/frontier_coverage_15/group_std_mean": 0.08654794842004776, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01405095923691988, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009871820569969715, "signal/frontier_coverage_20/centered_abs_mean": 0.10134778022766114, "signal/frontier_coverage_20/group_std_mean": 0.12806420773267746, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02063525579869747, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014492732472717762, "signal/frontier_coverage_25/centered_abs_mean": 0.14322546422481536, "signal/frontier_coverage_25/group_std_mean": 0.18165581822395324, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029165779426693916, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020481240935623646, "signal/frontier_coverage_5/centered_abs_mean": 0.15351369380950927, "signal/frontier_coverage_5/group_std_mean": 0.19981254935264586, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03126128278672695, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002195245958864689, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3323663532733917, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3995789408683777, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4719915151596069, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033236635476350786, "step": 340 }, { "calibration/aurc": 0.12287906036682998, "calibration/batch_distribution_entropy": 0.9814748056693892, "calibration/buffer_distribution_entropy": 0.9854357775100036, "calibration/confidence_entropy": 0.5071326681710258, "calibration/coverage@0%": 0.09825680183278489, "calibration/coverage@1%": 0.10783126991789127, "calibration/coverage@10%": 0.487797947842016, "calibration/coverage@15%": 0.687217223910841, "calibration/coverage@20%": 0.7937702159486942, "calibration/coverage@25%": 0.8835173146188409, "calibration/coverage@30%": 0.9581151832460734, "calibration/coverage@5%": 0.2910469628144352, "calibration/ece": 0.23547472343686363, "calibration/mean_confidence": 0.49689369470204864, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.011805555555555536, "completions/max_length": 4073.0, "completions/max_terminated_length": 4073.0, "completions/mean_length": 1078.1122436523438, "completions/mean_terminated_length": 1091.057568359375, "completions/min_length": 0.0, "completions/min_terminated_length": 258.0, "epoch": 0.8279896501293734, "grad_norm": 0.0016360305016860366, "learning_rate": 4.176682692307693e-06, "loss": -0.0312, "num_tokens": 847106533.0, "reward": 0.9822489023208618, "reward_std": 0.12897055447101594, "rewards/accuracy_reward": 0.6663194537162781, "rewards/brier_reward": 0.7981275916099548, "rewards/confidence_uniqueness_reward": 0.9408317327499389, "rewards/format_reward": 0.9881944417953491, "rewards/frontier_coverage_0": 0.029704060405492783, "rewards/frontier_coverage_1": 0.029704060405492783, "rewards/frontier_coverage_10": 0.03771770279854536, "rewards/frontier_coverage_15": 0.06011849418282509, "rewards/frontier_coverage_20": 0.1106926903128624, "rewards/frontier_coverage_25": 0.17512863874435425, "rewards/frontier_coverage_5": 0.029685120284557342, "rewards/frontier_entropy_batch_reward": -0.25664323568344116, "signal/accuracy_reward/centered_abs_mean": 0.1462131053209305, "signal/accuracy_reward/group_std_mean": 0.19600538313388824, "signal/accuracy_reward/group_zero_std_frac": 0.4333333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9670246005058288, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07310655266046524, "signal/advantage_abs_mean": 0.7388134717941284, "signal/advantage_pre_scale_abs_mean": 0.0943774089217186, "signal/advantage_pre_scale_std": 0.15511732697486877, "signal/advantage_std": 0.9831326723098754, "signal/brier_reward/centered_abs_mean": 0.12683559954166412, "signal/brier_reward/group_std_mean": 0.16544119119644166, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1707315742969513, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012683560699224472, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030468913540244103, "signal/confidence_uniqueness_reward/group_std_mean": 0.049927102774381636, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04096878692507744, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003046891512349248, "signal/format_reward/centered_abs_mean": 0.02019314244389534, "signal/format_reward/group_std_mean": 0.03766540549695492, "signal/format_reward/group_zero_std_frac": 0.8444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1350069150328636, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01009657122194767, "signal/frontier_coverage_0/centered_abs_mean": 0.1795959234237671, "signal/frontier_coverage_0/group_std_mean": 0.23311225175857545, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034363172575831415, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002568221651017666, "signal/frontier_coverage_1/centered_abs_mean": 0.1795959234237671, "signal/frontier_coverage_1/group_std_mean": 0.23311225175857545, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034363172575831415, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002568221651017666, "signal/frontier_coverage_10/centered_abs_mean": 0.10517283678054809, "signal/frontier_coverage_10/group_std_mean": 0.1382586717605591, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020169777423143388, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015039715450257062, "signal/frontier_coverage_15/centered_abs_mean": 0.062341035902500154, "signal/frontier_coverage_15/group_std_mean": 0.07913636118173599, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012101586163043975, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0008914768346585334, "signal/frontier_coverage_20/centered_abs_mean": 0.08644651770591735, "signal/frontier_coverage_20/group_std_mean": 0.11047539860010147, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016777468286454676, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001236185198649764, "signal/frontier_coverage_25/centered_abs_mean": 0.12307052761316299, "signal/frontier_coverage_25/group_std_mean": 0.1577708601951599, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023849079757928847, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017599085113033652, "signal/frontier_coverage_5/centered_abs_mean": 0.1793459564447403, "signal/frontier_coverage_5/group_std_mean": 0.23279379010200502, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0343147799372673, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025646470487117766, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3164542317390442, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38684444427490233, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4268608748912811, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03164542391896248, "step": 345 }, { "calibration/aurc": 0.20994078951499792, "calibration/batch_distribution_entropy": 0.9538957528776107, "calibration/buffer_distribution_entropy": 0.9870970000368399, "calibration/confidence_entropy": 0.44064173248762045, "calibration/coverage@0%": 0.01697276681996627, "calibration/coverage@1%": 0.01697276681996627, "calibration/coverage@10%": 0.2595149251508039, "calibration/coverage@15%": 0.41668076479409083, "calibration/coverage@20%": 0.5067220732085298, "calibration/coverage@25%": 0.582640842086777, "calibration/coverage@30%": 0.7986849580843456, "calibration/coverage@5%": 0.076308216555416, "calibration/ece": 0.15867194406599533, "calibration/mean_confidence": 0.5516345775419665, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017100694444444443, "completions/max_length": 3999.0, "completions/max_terminated_length": 3999.0, "completions/mean_length": 1023.1324829101562, "completions/mean_terminated_length": 1040.8033447265625, "completions/min_length": 0.0, "completions/min_terminated_length": 226.6, "epoch": 0.8399895001312484, "grad_norm": 0.0017015208723023534, "learning_rate": 4.146634615384616e-06, "loss": -0.048, "num_tokens": 861985019.0, "reward": 0.9820253729820252, "reward_std": 0.12685408145189286, "rewards/accuracy_reward": 0.6758680701255798, "rewards/brier_reward": 0.8076221346855164, "rewards/confidence_uniqueness_reward": 0.9325710296630859, "rewards/format_reward": 0.9828993082046509, "rewards/frontier_coverage_0": 0.04194375555962324, "rewards/frontier_coverage_1": 0.04194375555962324, "rewards/frontier_coverage_10": 0.04001317657530308, "rewards/frontier_coverage_15": 0.07637237012386322, "rewards/frontier_coverage_20": 0.13732877969741822, "rewards/frontier_coverage_25": 0.21134760677814485, "rewards/frontier_coverage_5": 0.04197418745607138, "rewards/frontier_entropy_batch_reward": -0.298277872800827, "signal/accuracy_reward/centered_abs_mean": 0.13314887136220932, "signal/accuracy_reward/group_std_mean": 0.17562229335308074, "signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9567326664924621, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06657443568110466, "signal/advantage_abs_mean": 0.7593642354011536, "signal/advantage_pre_scale_abs_mean": 0.09298344552516938, "signal/advantage_pre_scale_std": 0.1581791251897812, "signal/advantage_std": 0.9830424070358277, "signal/brier_reward/centered_abs_mean": 0.13524700105190277, "signal/brier_reward/group_std_mean": 0.1742014318704605, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19523520171642303, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013524701073765754, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.040957468748092654, "signal/confidence_uniqueness_reward/group_std_mean": 0.0662254698574543, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05914860144257546, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004095746856182814, "signal/format_reward/centered_abs_mean": 0.02872721329331398, "signal/format_reward/group_std_mean": 0.05186066627502441, "signal/format_reward/group_zero_std_frac": 0.7972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.20710389316082, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01436360664665699, "signal/frontier_coverage_0/centered_abs_mean": 0.18308203518390656, "signal/frontier_coverage_0/group_std_mean": 0.23565957844257354, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03774779662489891, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002618073020130396, "signal/frontier_coverage_1/centered_abs_mean": 0.18308203518390656, "signal/frontier_coverage_1/group_std_mean": 0.23565957844257354, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03774779662489891, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002618073020130396, "signal/frontier_coverage_10/centered_abs_mean": 0.10456371903419495, "signal/frontier_coverage_10/group_std_mean": 0.1368136912584305, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021551913022994994, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0014952612109482288, "signal/frontier_coverage_15/centered_abs_mean": 0.0707410454750061, "signal/frontier_coverage_15/group_std_mean": 0.08790467828512191, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01462008450180292, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010115969693288208, "signal/frontier_coverage_20/centered_abs_mean": 0.09556236267089843, "signal/frontier_coverage_20/group_std_mean": 0.11978346407413483, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019759462401270866, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013665418140590192, "signal/frontier_coverage_25/centered_abs_mean": 0.13080873787403108, "signal/frontier_coverage_25/group_std_mean": 0.16497585475444793, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0270388450473547, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018705649999901652, "signal/frontier_coverage_5/centered_abs_mean": 0.1828676700592041, "signal/frontier_coverage_5/group_std_mean": 0.23539845943450927, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.037703678011894226, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026150076184421776, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3392969012260437, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40456579327583314, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49072799682617185, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033929687738418576, "step": 350 }, { "epoch": 0.8399895001312484, "eval_calibration/aurc": 0.12627949306525968, "eval_calibration/batch_distribution_entropy": 0.9467912754909852, "eval_calibration/buffer_distribution_entropy": 0.9876254154039424, "eval_calibration/confidence_entropy": 0.48752701565040085, "eval_calibration/coverage@0%": 0.322244623655914, "eval_calibration/coverage@1%": 0.322244623655914, "eval_calibration/coverage@10%": 0.5063844086021505, "eval_calibration/coverage@15%": 0.7624327956989246, "eval_calibration/coverage@20%": 0.8098118279569894, "eval_calibration/coverage@25%": 0.8882728494623656, "eval_calibration/coverage@30%": 0.9519489247311829, "eval_calibration/coverage@5%": 0.322244623655914, "eval_calibration/ece": 0.2574667368716044, "eval_calibration/mean_confidence": 0.5234268010988257, "eval_completions/clipped_ratio": 0.01631944444444446, "eval_completions/max_length": 3784.5, "eval_completions/max_terminated_length": 3784.5, "eval_completions/mean_length": 1010.5425516764323, "eval_completions/mean_terminated_length": 1027.3308817545574, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 273.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 861985019.0, "eval_reward": 0.9114697674910227, "eval_reward_std": 0.24290815244118372, "eval_rewards/accuracy_reward": 0.6866319378217062, "eval_rewards/brier_reward": 0.7930839558442434, "eval_rewards/confidence_uniqueness_reward": 0.8857908844947815, "eval_rewards/format_reward": 0.984375, "eval_rewards/frontier_coverage_0": 0.01952519454061985, "eval_rewards/frontier_coverage_1": 0.01952519454061985, "eval_rewards/frontier_coverage_10": 0.0334686745579044, "eval_rewards/frontier_coverage_15": 0.06455122741560142, "eval_rewards/frontier_coverage_20": 0.11642149960001309, "eval_rewards/frontier_coverage_25": 0.18265460431575775, "eval_rewards/frontier_coverage_5": 0.019537934257338446, "eval_rewards/frontier_entropy_batch_reward": -0.984375, "eval_runtime": 213.2572, "eval_samples_per_second": 4.689, "eval_signal/accuracy_reward/centered_abs_mean": 0.4206271717945735, "eval_signal/accuracy_reward/group_std_mean": 0.4648505250612895, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8802718718846639, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21031358589728674, "eval_signal/advantage_abs_mean": 0.8569314777851105, "eval_signal/advantage_pre_scale_abs_mean": 0.20814315478006998, "eval_signal/advantage_pre_scale_std": 0.24182888368765512, "eval_signal/advantage_std": 0.9864102900028229, "eval_signal/brier_reward/centered_abs_mean": 0.19394498566786447, "eval_signal/brier_reward/group_std_mean": 0.24899733563264212, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08124354109168053, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01939449831843376, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.057598222667972244, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09713333596785863, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.023997636511921883, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005759822437539697, "eval_signal/format_reward/centered_abs_mean": 0.029622395678112905, "eval_signal/format_reward/group_std_mean": 0.07291496824473143, "eval_signal/format_reward/group_zero_std_frac": 0.6388889104127884, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.06088021490722895, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.014811197839056453, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.31392258902390796, "eval_signal/frontier_coverage_0/group_std_mean": 0.4262712150812149, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018851852975785732, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004489093010003368, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.31392258902390796, "eval_signal/frontier_coverage_1/group_std_mean": 0.4262712150812149, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018851852975785732, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004489093010003368, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.17115350315968195, "eval_signal/frontier_coverage_10/group_std_mean": 0.24345367650190988, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010294714787354073, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0024474948877468705, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08757202078898747, "eval_signal/frontier_coverage_15/group_std_mean": 0.10957717150449753, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005260932492092252, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012522799079306424, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1454414650797844, "eval_signal/frontier_coverage_20/group_std_mean": 0.18414875119924545, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008730871990943948, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020798128486300507, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.22669106721878052, "eval_signal/frontier_coverage_25/group_std_mean": 0.28089650968710583, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013599606230854988, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003241682231115798, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.31356939673423767, "eval_signal/frontier_coverage_5/group_std_mean": 0.4258475701014201, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.018830711642901104, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004484042447681229, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029622395678112905, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07291496824473143, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6388889104127884, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012176043431585034, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0029622397075096765, "eval_steps_per_second": 0.028, "step": 350 }, { "epoch": 0.8399895001312484, "step": 350, "train_probe_calibration/aurc": 0.11138316671294553, "train_probe_calibration/batch_distribution_entropy": 0.9328936521878086, "train_probe_calibration/buffer_distribution_entropy": 0.987729842542267, "train_probe_calibration/confidence_entropy": 0.5000671971525904, "train_probe_calibration/coverage@0%": 0.22043010752688172, "train_probe_calibration/coverage@1%": 0.22043010752688172, "train_probe_calibration/coverage@10%": 0.6088709677419355, "train_probe_calibration/coverage@15%": 0.8383736559139785, "train_probe_calibration/coverage@20%": 0.9114583333333334, "train_probe_calibration/coverage@25%": 0.9583333333333334, "train_probe_calibration/coverage@30%": 0.9895833333333334, "train_probe_calibration/coverage@5%": 0.36290322580645157, "train_probe_calibration/ece": 0.28476146068548386, "train_probe_calibration/mean_confidence": 0.5098099438844086, "train_probe_completions/clipped_ratio": 0.014756944444444439, "train_probe_completions/max_length": 3701.6666666666665, "train_probe_completions/max_terminated_length": 3701.6666666666665, "train_probe_completions/mean_length": 989.0814412434896, "train_probe_completions/mean_terminated_length": 1003.9476216634115, "train_probe_completions/min_length": 93.66666666666667, "train_probe_completions/min_terminated_length": 246.66666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 861985019.0, "train_probe_reward": 0.9344007472197214, "train_probe_reward_std": 0.23137953132390976, "train_probe_rewards/accuracy_reward": 0.7387152711550394, "train_probe_rewards/brier_reward": 0.7828520933787028, "train_probe_rewards/confidence_uniqueness_reward": 0.8851476311683655, "train_probe_rewards/format_reward": 0.9843750099341074, "train_probe_rewards/frontier_coverage_0": -0.02593635581433773, "train_probe_rewards/frontier_coverage_1": -0.02593635581433773, "train_probe_rewards/frontier_coverage_10": 0.005999071678767602, "train_probe_rewards/frontier_coverage_15": 0.06303013488650322, "train_probe_rewards/frontier_coverage_20": 0.12391630684336026, "train_probe_rewards/frontier_coverage_25": 0.19901238630215326, "train_probe_rewards/frontier_coverage_5": -0.02588070183992386, "train_probe_rewards/frontier_entropy_batch_reward": -0.9843750099341074, "train_probe_runtime": 207.6057, "train_probe_samples_per_second": 4.817, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3740776975949605, "train_probe_signal/accuracy_reward/group_std_mean": 0.43718187014261883, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.830837219953537, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18703884879748026, "train_probe_signal/advantage_abs_mean": 0.8095610837141672, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18753594905138016, "train_probe_signal/advantage_pre_scale_std": 0.23162239789962769, "train_probe_signal/advantage_std": 0.9863859911759695, "train_probe_signal/brier_reward/centered_abs_mean": 0.20244234800338745, "train_probe_signal/brier_reward/group_std_mean": 0.26038682212432224, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08980598424871762, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.020244235793749493, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05636486907800039, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.09551115706562996, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024982516343394916, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005636486845711867, "train_probe_signal/format_reward/centered_abs_mean": 0.029296875620881718, "train_probe_signal/format_reward/group_std_mean": 0.06911189792056878, "train_probe_signal/format_reward/group_zero_std_frac": 0.6666666865348816, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.06429031708588202, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.014648437810440859, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.3012530356645584, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.42430545886357623, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01914732779065768, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004307918444586297, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3012530356645584, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.42430545886357623, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01914732779065768, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004307918444586297, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.16096202532450357, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.24077540387709936, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010228140590091547, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0023017569134632745, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.08790385474761327, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.11155568187435468, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005583847174420953, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001257025171071291, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.14556232343117395, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.18222308903932571, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009244945365935564, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020815412087055543, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2230932116508484, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.27396292984485626, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0141687939564387, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003190232984100779, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.30094441771507263, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.42391479512055713, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.019127743629117806, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004303505294956267, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.029296875620881718, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.06911189792056878, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6666666865348816, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.012858063836271564, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.00292968771342809, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.20218502123502455, "calibration/batch_distribution_entropy": 0.9779656063892196, "calibration/buffer_distribution_entropy": 0.9880120078431807, "calibration/confidence_entropy": 0.5058543228286638, "calibration/coverage@0%": 0.018292267448295645, "calibration/coverage@1%": 0.018292267448295645, "calibration/coverage@10%": 0.38723870443511277, "calibration/coverage@15%": 0.47008130188731256, "calibration/coverage@20%": 0.5248855388816154, "calibration/coverage@25%": 0.602678489200584, "calibration/coverage@30%": 0.7387349490230253, "calibration/coverage@5%": 0.13471608262392168, "calibration/ece": 0.15619698133977333, "calibration/mean_confidence": 0.543017593254968, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01953125000000002, "completions/max_length": 4024.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 1027.3447265625, "completions/mean_terminated_length": 1048.2111450195312, "completions/min_length": 0.0, "completions/min_terminated_length": 224.4, "epoch": 0.8519893501331234, "grad_norm": 0.0016697923419997096, "learning_rate": 4.116586538461539e-06, "loss": -0.0577, "num_tokens": 876918622.0, "reward": 0.9857487201690673, "reward_std": 0.1384602814912796, "rewards/accuracy_reward": 0.6854166626930237, "rewards/brier_reward": 0.7945013403892517, "rewards/confidence_uniqueness_reward": 0.9326841950416564, "rewards/format_reward": 0.9798611164093017, "rewards/frontier_coverage_0": 0.014988563163205982, "rewards/frontier_coverage_1": 0.014988563163205982, "rewards/frontier_coverage_10": 0.029185665771365166, "rewards/frontier_coverage_15": 0.06560450792312622, "rewards/frontier_coverage_20": 0.12084827721118926, "rewards/frontier_coverage_25": 0.18861164450645446, "rewards/frontier_coverage_5": 0.015013675601221622, "rewards/frontier_entropy_batch_reward": -0.2603289097547531, "signal/accuracy_reward/centered_abs_mean": 0.1457356780767441, "signal/accuracy_reward/group_std_mean": 0.19056201577186585, "signal/accuracy_reward/group_zero_std_frac": 0.46111111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9496296286582947, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07286783903837205, "signal/advantage_abs_mean": 0.750654423236847, "signal/advantage_pre_scale_abs_mean": 0.10265205949544906, "signal/advantage_pre_scale_std": 0.1708187222480774, "signal/advantage_std": 0.9831824183464051, "signal/brier_reward/centered_abs_mean": 0.14322828352451325, "signal/brier_reward/group_std_mean": 0.18320123255252838, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1866349160671234, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014322828128933906, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04218879491090775, "signal/confidence_uniqueness_reward/group_std_mean": 0.06747839152812958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05476883351802826, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004218879528343678, "signal/format_reward/centered_abs_mean": 0.03328993059694767, "signal/format_reward/group_std_mean": 0.057012468576431274, "signal/format_reward/group_zero_std_frac": 0.7833333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21570837497711182, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016644965298473834, "signal/frontier_coverage_0/centered_abs_mean": 0.18814339339733124, "signal/frontier_coverage_0/group_std_mean": 0.24020065665245055, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03511350601911545, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026904504746198656, "signal/frontier_coverage_1/centered_abs_mean": 0.18814339339733124, "signal/frontier_coverage_1/group_std_mean": 0.24020065665245055, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03511350601911545, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026904504746198656, "signal/frontier_coverage_10/centered_abs_mean": 0.11119063049554825, "signal/frontier_coverage_10/group_std_mean": 0.14428375512361527, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020754556357860564, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015900259371846915, "signal/frontier_coverage_15/centered_abs_mean": 0.06850891709327697, "signal/frontier_coverage_15/group_std_mean": 0.08640409409999847, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012759264931082726, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009796775411814451, "signal/frontier_coverage_20/centered_abs_mean": 0.09464813470840454, "signal/frontier_coverage_20/group_std_mean": 0.12077843248844147, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01762023866176605, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013534683734178544, "signal/frontier_coverage_25/centered_abs_mean": 0.1316935181617737, "signal/frontier_coverage_25/group_std_mean": 0.16893566846847535, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024517284706234932, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018832173896953463, "signal/frontier_coverage_5/centered_abs_mean": 0.18792597949504852, "signal/frontier_coverage_5/group_std_mean": 0.23992567658424377, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03507302924990654, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002687341393902898, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3207733452320099, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39081095457077025, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41783658862113954, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03207733556628227, "step": 355 }, { "calibration/aurc": 0.11303180426621968, "calibration/batch_distribution_entropy": 0.9234944837461085, "calibration/buffer_distribution_entropy": 0.9878215954898579, "calibration/confidence_entropy": 0.49597159820928355, "calibration/coverage@0%": 0.03945796090860965, "calibration/coverage@1%": 0.138416294241943, "calibration/coverage@10%": 0.6530974518273713, "calibration/coverage@15%": 0.7242895348491992, "calibration/coverage@20%": 0.772423903710264, "calibration/coverage@25%": 0.8850370047066054, "calibration/coverage@30%": 0.9324324324324325, "calibration/coverage@5%": 0.44373066635011693, "calibration/ece": 0.12756140317825732, "calibration/mean_confidence": 0.643185101661414, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019704861111111093, "completions/max_length": 3997.8, "completions/max_terminated_length": 3997.8, "completions/mean_length": 999.0853271484375, "completions/mean_terminated_length": 1019.1470458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 202.4, "epoch": 0.8639892001349984, "grad_norm": 0.0016646608710289001, "learning_rate": 4.086538461538462e-06, "loss": -0.054, "num_tokens": 891516405.0, "reward": 0.9785197615623474, "reward_std": 0.14136494994163512, "rewards/accuracy_reward": 0.6750868082046508, "rewards/brier_reward": 0.8187860488891602, "rewards/confidence_uniqueness_reward": 0.9283985733985901, "rewards/format_reward": 0.9799479246139526, "rewards/frontier_coverage_0": 0.04092637412250042, "rewards/frontier_coverage_1": 0.04092637412250042, "rewards/frontier_coverage_10": 0.042030976712703706, "rewards/frontier_coverage_15": 0.0748762235045433, "rewards/frontier_coverage_20": 0.13637623935937881, "rewards/frontier_coverage_25": 0.2098323732614517, "rewards/frontier_coverage_5": 0.040917468070983884, "rewards/frontier_entropy_batch_reward": -0.3209426164627075, "signal/accuracy_reward/centered_abs_mean": 0.1537706136703491, "signal/accuracy_reward/group_std_mean": 0.1990972250699997, "signal/accuracy_reward/group_zero_std_frac": 0.44166666865348814, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0244532942771911, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07688530683517455, "signal/advantage_abs_mean": 0.7572015047073364, "signal/advantage_pre_scale_abs_mean": 0.10625196099281312, "signal/advantage_pre_scale_std": 0.17508123219013214, "signal/advantage_std": 0.9831428170204163, "signal/brier_reward/centered_abs_mean": 0.13040018677711487, "signal/brier_reward/group_std_mean": 0.16888347268104553, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17432362139225005, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013040019199252129, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04242881685495377, "signal/confidence_uniqueness_reward/group_std_mean": 0.06739743649959565, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05661019757390022, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00424288185313344, "signal/format_reward/centered_abs_mean": 0.03179796040058136, "signal/format_reward/group_std_mean": 0.054969260841608046, "signal/format_reward/group_zero_std_frac": 0.7861111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21095921397209166, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01589898020029068, "signal/frontier_coverage_0/centered_abs_mean": 0.16192724108695983, "signal/frontier_coverage_0/group_std_mean": 0.2089155375957489, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030903545394539832, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002315559471026063, "signal/frontier_coverage_1/centered_abs_mean": 0.16192724108695983, "signal/frontier_coverage_1/group_std_mean": 0.2089155375957489, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030903545394539832, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002315559471026063, "signal/frontier_coverage_10/centered_abs_mean": 0.09113808274269104, "signal/frontier_coverage_10/group_std_mean": 0.11921153515577317, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017393879033625124, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013032745802775026, "signal/frontier_coverage_15/centered_abs_mean": 0.06823997497558594, "signal/frontier_coverage_15/group_std_mean": 0.08615372478961944, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013093681819736958, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009758316911756993, "signal/frontier_coverage_20/centered_abs_mean": 0.10009044259786606, "signal/frontier_coverage_20/group_std_mean": 0.1269907459616661, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01922752782702446, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014312933897599577, "signal/frontier_coverage_25/centered_abs_mean": 0.14133644700050355, "signal/frontier_coverage_25/group_std_mean": 0.17950156033039094, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02715020589530468, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020211110822856426, "signal/frontier_coverage_5/centered_abs_mean": 0.16168299615383147, "signal/frontier_coverage_5/group_std_mean": 0.2086247146129608, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030856142193078993, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002312066778540611, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32869015336036683, "signal/frontier_entropy_batch_reward/group_std_mean": 0.393494176864624, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4426185369491577, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286901637911797, "step": 360 }, { "calibration/aurc": 0.10046656835849702, "calibration/batch_distribution_entropy": 0.9755306822700215, "calibration/buffer_distribution_entropy": 0.9876695227054582, "calibration/confidence_entropy": 0.4657519076394254, "calibration/coverage@0%": 0.2736829420052594, "calibration/coverage@1%": 0.3068851405653573, "calibration/coverage@10%": 0.6568735419839828, "calibration/coverage@15%": 0.7042328981637536, "calibration/coverage@20%": 0.753031806065644, "calibration/coverage@25%": 0.851407047876506, "calibration/coverage@30%": 0.8967798974982386, "calibration/coverage@5%": 0.5671302818644433, "calibration/ece": 0.22066731060189587, "calibration/mean_confidence": 0.5431307836996118, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02265624999999998, "completions/max_length": 4056.0, "completions/max_terminated_length": 4056.0, "completions/mean_length": 1015.9993286132812, "completions/mean_terminated_length": 1039.5235473632813, "completions/min_length": 0.0, "completions/min_terminated_length": 187.4, "epoch": 0.8759890501368733, "grad_norm": 0.001823436003178358, "learning_rate": 4.0564903846153846e-06, "loss": -0.0594, "num_tokens": 906318413.0, "reward": 0.9893832921981811, "reward_std": 0.13554426431655883, "rewards/accuracy_reward": 0.7008680462837219, "rewards/brier_reward": 0.7875616431236268, "rewards/confidence_uniqueness_reward": 0.9278796195983887, "rewards/format_reward": 0.9766493082046509, "rewards/frontier_coverage_0": 0.007047331100329757, "rewards/frontier_coverage_1": 0.007047331100329757, "rewards/frontier_coverage_10": 0.025778009090572596, "rewards/frontier_coverage_15": 0.0655012458562851, "rewards/frontier_coverage_20": 0.12075587064027786, "rewards/frontier_coverage_25": 0.18987874686717987, "rewards/frontier_coverage_5": 0.007139163976535201, "rewards/frontier_entropy_batch_reward": -0.2697057068347931, "signal/accuracy_reward/centered_abs_mean": 0.13690320998430253, "signal/accuracy_reward/group_std_mean": 0.18647956252098083, "signal/accuracy_reward/group_zero_std_frac": 0.45000000596046447, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9336233973503113, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06845160499215126, "signal/advantage_abs_mean": 0.7356076240539551, "signal/advantage_pre_scale_abs_mean": 0.0975809395313263, "signal/advantage_pre_scale_std": 0.1690650999546051, "signal/advantage_std": 0.9831165671348572, "signal/brier_reward/centered_abs_mean": 0.1374752402305603, "signal/brier_reward/group_std_mean": 0.17852137982845306, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1881021738052368, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013747524283826352, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04467645138502121, "signal/confidence_uniqueness_reward/group_std_mean": 0.0713021658360958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061282969266176227, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004467645334079862, "signal/format_reward/centered_abs_mean": 0.03498806394636631, "signal/format_reward/group_std_mean": 0.05998894199728966, "signal/format_reward/group_zero_std_frac": 0.7750000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.24012745022773743, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017494031973183156, "signal/frontier_coverage_0/centered_abs_mean": 0.18660827577114106, "signal/frontier_coverage_0/group_std_mean": 0.24346633851528168, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03638794124126434, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0026684983633458613, "signal/frontier_coverage_1/centered_abs_mean": 0.18660827577114106, "signal/frontier_coverage_1/group_std_mean": 0.24346633851528168, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03638794124126434, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0026684983633458613, "signal/frontier_coverage_10/centered_abs_mean": 0.1076819583773613, "signal/frontier_coverage_10/group_std_mean": 0.14231389611959458, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020962074026465417, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001539852051064372, "signal/frontier_coverage_15/centered_abs_mean": 0.06423577815294265, "signal/frontier_coverage_15/group_std_mean": 0.08117228299379349, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012597080320119858, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00091857158113271, "signal/frontier_coverage_20/centered_abs_mean": 0.08381548821926117, "signal/frontier_coverage_20/group_std_mean": 0.1063159465789795, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016472844406962395, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0011985614662989974, "signal/frontier_coverage_25/centered_abs_mean": 0.11446720212697983, "signal/frontier_coverage_25/group_std_mean": 0.14590185582637788, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022491169348359107, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0016368810553103685, "signal/frontier_coverage_5/centered_abs_mean": 0.18639478087425232, "signal/frontier_coverage_5/group_std_mean": 0.2431890696287155, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036346501857042315, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002665445441380143, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31751392483711244, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3888779103755951, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4340252041816711, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03175139352679253, "step": 365 }, { "calibration/aurc": 0.11075577590842926, "calibration/batch_distribution_entropy": 0.8718789410455423, "calibration/buffer_distribution_entropy": 0.9877933843635548, "calibration/confidence_entropy": 0.4901921855875632, "calibration/coverage@0%": 0.1376387343102201, "calibration/coverage@1%": 0.19133080350082063, "calibration/coverage@10%": 0.6036193234453195, "calibration/coverage@15%": 0.8161315365363011, "calibration/coverage@20%": 0.8846361185983828, "calibration/coverage@25%": 0.9013477088948786, "calibration/coverage@30%": 0.910512129380054, "calibration/coverage@5%": 0.38877242550776914, "calibration/ece": 0.14343248950202986, "calibration/mean_confidence": 0.6587681828657079, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012586805555555558, "completions/max_length": 4009.4, "completions/max_terminated_length": 4009.4, "completions/mean_length": 944.9483520507813, "completions/mean_terminated_length": 957.1441284179688, "completions/min_length": 0.0, "completions/min_terminated_length": 227.6, "epoch": 0.8879889001387483, "grad_norm": 0.001997613813728094, "learning_rate": 4.026442307692308e-06, "loss": -0.0314, "num_tokens": 920275482.0, "reward": 1.0175946712493897, "reward_std": 0.11938634067773819, "rewards/accuracy_reward": 0.7550347208976745, "rewards/brier_reward": 0.8396147847175598, "rewards/confidence_uniqueness_reward": 0.929998254776001, "rewards/format_reward": 0.9874131798744201, "rewards/frontier_coverage_0": 0.008122816309332847, "rewards/frontier_coverage_1": 0.008122816309332847, "rewards/frontier_coverage_10": 0.028084695525467395, "rewards/frontier_coverage_15": 0.09159668684005737, "rewards/frontier_coverage_20": 0.17193578481674193, "rewards/frontier_coverage_25": 0.2666388005018234, "rewards/frontier_coverage_5": 0.008194121345877648, "rewards/frontier_entropy_batch_reward": -0.3892317533493042, "signal/accuracy_reward/centered_abs_mean": 0.12407768964767456, "signal/accuracy_reward/group_std_mean": 0.16893844306468964, "signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9724384427070618, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06203884482383728, "signal/advantage_abs_mean": 0.7490497827529907, "signal/advantage_pre_scale_abs_mean": 0.08648647367954254, "signal/advantage_pre_scale_std": 0.1495155483484268, "signal/advantage_std": 0.9829149723052979, "signal/brier_reward/centered_abs_mean": 0.11033552289009094, "signal/brier_reward/group_std_mean": 0.1457503229379654, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17304804623126985, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011033552512526513, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034914476424455644, "signal/confidence_uniqueness_reward/group_std_mean": 0.05499633625149727, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05407209992408753, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034914476331323386, "signal/format_reward/centered_abs_mean": 0.02003580741584301, "signal/format_reward/group_std_mean": 0.037604504451155665, "signal/format_reward/group_zero_std_frac": 0.8472222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1530803084373474, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010017903707921504, "signal/frontier_coverage_0/centered_abs_mean": 0.13898501694202423, "signal/frontier_coverage_0/group_std_mean": 0.17986855208873748, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031086085736751555, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019874857971444726, "signal/frontier_coverage_1/centered_abs_mean": 0.13898501694202423, "signal/frontier_coverage_1/group_std_mean": 0.17986855208873748, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031086085736751555, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019874857971444726, "signal/frontier_coverage_10/centered_abs_mean": 0.07780203223228455, "signal/frontier_coverage_10/group_std_mean": 0.10243205726146698, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017435486987233163, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001112569053657353, "signal/frontier_coverage_15/centered_abs_mean": 0.06910406351089478, "signal/frontier_coverage_15/group_std_mean": 0.08591423332691192, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015592486225068569, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009881880832836032, "signal/frontier_coverage_20/centered_abs_mean": 0.10108875632286071, "signal/frontier_coverage_20/group_std_mean": 0.12666354775428773, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022827718779444693, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014455692144110799, "signal/frontier_coverage_25/centered_abs_mean": 0.1385332614183426, "signal/frontier_coverage_25/group_std_mean": 0.17508584558963775, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03128995075821876, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019810255384072662, "signal/frontier_coverage_5/centered_abs_mean": 0.13876722007989883, "signal/frontier_coverage_5/group_std_mean": 0.1796049416065216, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031037060543894768, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001984371221624315, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34756895899772644, "signal/frontier_entropy_batch_reward/group_std_mean": 0.408492773771286, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.54748575091362, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03475689888000488, "step": 370 }, { "calibration/aurc": 0.16836747731521123, "calibration/batch_distribution_entropy": 0.9409667328624938, "calibration/buffer_distribution_entropy": 0.9858811949340381, "calibration/confidence_entropy": 0.5013783566039679, "calibration/coverage@0%": 0.01783872765233107, "calibration/coverage@1%": 0.01783872765233107, "calibration/coverage@10%": 0.24995211403058035, "calibration/coverage@15%": 0.5976058346179306, "calibration/coverage@20%": 0.7031263919516597, "calibration/coverage@25%": 0.8019207635514969, "calibration/coverage@30%": 0.8845246697506749, "calibration/coverage@5%": 0.09129267502075213, "calibration/ece": 0.16091990937012796, "calibration/mean_confidence": 0.6130512914764606, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007378472222222187, "completions/max_length": 3958.2, "completions/max_terminated_length": 3958.2, "completions/mean_length": 985.8460327148438, "completions/mean_terminated_length": 992.9516845703125, "completions/min_length": 0.0, "completions/min_terminated_length": 232.8, "epoch": 0.8999887501406233, "grad_norm": 0.0019466127268970013, "learning_rate": 3.996394230769231e-06, "loss": -0.0099, "num_tokens": 934735820.0, "reward": 0.9989883065223694, "reward_std": 0.11320204883813859, "rewards/accuracy_reward": 0.6914930582046509, "rewards/brier_reward": 0.8222344160079956, "rewards/confidence_uniqueness_reward": 0.9439424037933349, "rewards/format_reward": 0.9927951335906983, "rewards/frontier_coverage_0": 0.03563723305705935, "rewards/frontier_coverage_1": 0.035642618965357545, "rewards/frontier_coverage_10": 0.044866102561354634, "rewards/frontier_coverage_15": 0.07400252968072892, "rewards/frontier_coverage_20": 0.13133004158735276, "rewards/frontier_coverage_25": 0.20458360016345978, "rewards/frontier_coverage_5": 0.03570191371254623, "rewards/frontier_entropy_batch_reward": -0.2780673325061798, "signal/accuracy_reward/centered_abs_mean": 0.13141276091337203, "signal/accuracy_reward/group_std_mean": 0.1752326160669327, "signal/accuracy_reward/group_zero_std_frac": 0.48888888359069826, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9961167454719544, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06570638045668602, "signal/advantage_abs_mean": 0.7583129167556762, "signal/advantage_pre_scale_abs_mean": 0.08607572019100189, "signal/advantage_pre_scale_std": 0.13993633836507796, "signal/advantage_std": 0.9829720020294189, "signal/brier_reward/centered_abs_mean": 0.11517563909292221, "signal/brier_reward/group_std_mean": 0.14917479753494262, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17474163174629212, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01151756402105093, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023243167623877525, "signal/confidence_uniqueness_reward/group_std_mean": 0.036514821276068685, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03521691001951695, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023243167670443654, "signal/format_reward/centered_abs_mean": 0.011572265811264516, "signal/format_reward/group_std_mean": 0.022318005003035067, "signal/format_reward/group_zero_std_frac": 0.9027777791023255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08761402815580369, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005786132905632258, "signal/frontier_coverage_0/centered_abs_mean": 0.1577325791120529, "signal/frontier_coverage_0/group_std_mean": 0.20713994801044464, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03425569087266922, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022555758245289327, "signal/frontier_coverage_1/centered_abs_mean": 0.15770569443702698, "signal/frontier_coverage_1/group_std_mean": 0.20710653066635132, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034249893575906756, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002255191374570131, "signal/frontier_coverage_10/centered_abs_mean": 0.08623393028974533, "signal/frontier_coverage_10/group_std_mean": 0.11464256942272186, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01874492093920708, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012331451987847685, "signal/frontier_coverage_15/centered_abs_mean": 0.0666267767548561, "signal/frontier_coverage_15/group_std_mean": 0.08374895453453064, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014445245079696179, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009527628892101348, "signal/frontier_coverage_20/centered_abs_mean": 0.09474294930696488, "signal/frontier_coverage_20/group_std_mean": 0.11888675689697266, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020516883209347726, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001354824099689722, "signal/frontier_coverage_25/centered_abs_mean": 0.13197840452194215, "signal/frontier_coverage_25/group_std_mean": 0.16576823592185974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028572235628962515, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001887291227467358, "signal/frontier_coverage_5/centered_abs_mean": 0.1573409467935562, "signal/frontier_coverage_5/group_std_mean": 0.20665175020694732, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034170858934521674, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002249975502490997, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32009653449058534, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3921052277088165, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48509649038314817, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032009654119610785, "step": 375 }, { "calibration/aurc": 0.16092519711536962, "calibration/batch_distribution_entropy": 0.9423709566617757, "calibration/buffer_distribution_entropy": 0.9849338657064634, "calibration/confidence_entropy": 0.48836660732999615, "calibration/coverage@0%": 0.07674090848546708, "calibration/coverage@1%": 0.09743056365788086, "calibration/coverage@10%": 0.3124138291737335, "calibration/coverage@15%": 0.5342731730034984, "calibration/coverage@20%": 0.6844617418251927, "calibration/coverage@25%": 0.7673174485999794, "calibration/coverage@30%": 0.9796584880636605, "calibration/coverage@5%": 0.2446315302305752, "calibration/ece": 0.16269622681227508, "calibration/mean_confidence": 0.6199632640210615, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111116, "completions/max_length": 3954.0, "completions/max_terminated_length": 3954.0, "completions/mean_length": 975.0330810546875, "completions/mean_terminated_length": 982.1337524414063, "completions/min_length": 0.0, "completions/min_terminated_length": 201.8, "epoch": 0.9119886001424983, "grad_norm": 0.0019179882947355509, "learning_rate": 3.966346153846154e-06, "loss": -0.0094, "num_tokens": 949099241.0, "reward": 1.011078429222107, "reward_std": 0.11091785579919815, "rewards/accuracy_reward": 0.7229166746139526, "rewards/brier_reward": 0.8134139537811279, "rewards/confidence_uniqueness_reward": 0.9428596019744873, "rewards/format_reward": 0.9926215171813965, "rewards/frontier_coverage_0": 0.004921641945838928, "rewards/frontier_coverage_1": 0.004925927333533764, "rewards/frontier_coverage_10": 0.01868428089655936, "rewards/frontier_coverage_15": 0.07345463410019874, "rewards/frontier_coverage_20": 0.13654196113348008, "rewards/frontier_coverage_25": 0.2155262529850006, "rewards/frontier_coverage_5": 0.0050234109163284305, "rewards/frontier_entropy_batch_reward": -0.28882819712162017, "signal/accuracy_reward/centered_abs_mean": 0.12810329645872115, "signal/accuracy_reward/group_std_mean": 0.17452663481235503, "signal/accuracy_reward/group_zero_std_frac": 0.4805555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9722905874252319, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06405164822936057, "signal/advantage_abs_mean": 0.754323148727417, "signal/advantage_pre_scale_abs_mean": 0.08285565674304962, "signal/advantage_pre_scale_std": 0.13604794144630433, "signal/advantage_std": 0.9829652667045593, "signal/brier_reward/centered_abs_mean": 0.11799997389316559, "signal/brier_reward/group_std_mean": 0.15085006952285768, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17907191216945648, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01179999802261591, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024614708870649336, "signal/confidence_uniqueness_reward/group_std_mean": 0.03595021180808544, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03735269904136658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002461470989510417, "signal/format_reward/centered_abs_mean": 0.011593967117369175, "signal/format_reward/group_std_mean": 0.019906727969646452, "signal/format_reward/group_zero_std_frac": 0.9222222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08723903000354767, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005796983558684588, "signal/frontier_coverage_0/centered_abs_mean": 0.16741606891155242, "signal/frontier_coverage_0/group_std_mean": 0.21475327610969544, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036335456371307376, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023940497543662787, "signal/frontier_coverage_1/centered_abs_mean": 0.16740790605545045, "signal/frontier_coverage_1/group_std_mean": 0.21474320888519288, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03633376285433769, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023939330130815506, "signal/frontier_coverage_10/centered_abs_mean": 0.09415251165628433, "signal/frontier_coverage_10/group_std_mean": 0.12194554954767227, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.020430530607700347, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001346380915492773, "signal/frontier_coverage_15/centered_abs_mean": 0.06532250344753265, "signal/frontier_coverage_15/group_std_mean": 0.08159894198179245, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014241785556077958, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009341117809526622, "signal/frontier_coverage_20/centered_abs_mean": 0.08967762291431428, "signal/frontier_coverage_20/group_std_mean": 0.11331800818443298, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01956898979842663, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001282389950938523, "signal/frontier_coverage_25/centered_abs_mean": 0.12472088485956193, "signal/frontier_coverage_25/group_std_mean": 0.15883929431438445, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027211637794971467, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017835085978731512, "signal/frontier_coverage_5/centered_abs_mean": 0.16699602901935579, "signal/frontier_coverage_5/group_std_mean": 0.21422846913337706, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03624384626746178, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002388043189421296, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3240382134914398, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39173341989517213, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49400672912597654, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032403822615742685, "step": 380 }, { "calibration/aurc": 0.19064716793554867, "calibration/batch_distribution_entropy": 0.9369254339434014, "calibration/buffer_distribution_entropy": 0.9836490401269389, "calibration/confidence_entropy": 0.495918887964904, "calibration/coverage@0%": 0.02263309099950029, "calibration/coverage@1%": 0.02263309099950029, "calibration/coverage@10%": 0.31466428353582415, "calibration/coverage@15%": 0.4292183774827497, "calibration/coverage@20%": 0.6041198609363091, "calibration/coverage@25%": 0.7517649378676992, "calibration/coverage@30%": 0.8218858491876129, "calibration/coverage@5%": 0.16429975766616695, "calibration/ece": 0.1446770208112851, "calibration/mean_confidence": 0.5884134436310122, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008680555555555535, "completions/max_length": 3991.6, "completions/max_terminated_length": 3991.6, "completions/mean_length": 1074.7923828125, "completions/mean_terminated_length": 1084.2991943359375, "completions/min_length": 0.0, "completions/min_terminated_length": 211.0, "epoch": 0.9239884501443731, "grad_norm": 0.0019680929835885763, "learning_rate": 3.936298076923077e-06, "loss": -0.0255, "num_tokens": 964584497.0, "reward": 0.9990208506584167, "reward_std": 0.11245020627975463, "rewards/accuracy_reward": 0.7045138835906982, "rewards/brier_reward": 0.8249103784561157, "rewards/confidence_uniqueness_reward": 0.9390157103538513, "rewards/format_reward": 0.9913194298744201, "rewards/frontier_coverage_0": 0.02678363719023764, "rewards/frontier_coverage_1": 0.02678363719023764, "rewards/frontier_coverage_10": 0.033601064234972, "rewards/frontier_coverage_15": 0.07888007164001465, "rewards/frontier_coverage_20": 0.14335883557796478, "rewards/frontier_coverage_25": 0.22403789162635804, "rewards/frontier_coverage_5": 0.026947683235630394, "rewards/frontier_entropy_batch_reward": -0.333020281791687, "signal/accuracy_reward/centered_abs_mean": 0.11980251967906952, "signal/accuracy_reward/group_std_mean": 0.16275928020477295, "signal/accuracy_reward/group_zero_std_frac": 0.5194444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9279258489608765, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05990125983953476, "signal/advantage_abs_mean": 0.7537263393402099, "signal/advantage_pre_scale_abs_mean": 0.08232245296239853, "signal/advantage_pre_scale_std": 0.14088055938482286, "signal/advantage_std": 0.9829261660575866, "signal/brier_reward/centered_abs_mean": 0.11604090929031372, "signal/brier_reward/group_std_mean": 0.1511075794696808, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1813347041606903, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01160409115254879, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02827602457255125, "signal/confidence_uniqueness_reward/group_std_mean": 0.046187874674797055, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043776792287826535, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002827602461911738, "signal/format_reward/centered_abs_mean": 0.01551649336470291, "signal/format_reward/group_std_mean": 0.03089729677885771, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11859939582645893, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007758246682351455, "signal/frontier_coverage_0/centered_abs_mean": 0.1470422476530075, "signal/frontier_coverage_0/group_std_mean": 0.193260794878006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032766058668494226, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021027040667831896, "signal/frontier_coverage_1/centered_abs_mean": 0.1470422476530075, "signal/frontier_coverage_1/group_std_mean": 0.193260794878006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032766058668494226, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021027040667831896, "signal/frontier_coverage_10/centered_abs_mean": 0.08149942010641098, "signal/frontier_coverage_10/group_std_mean": 0.10828516483306885, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018163814209401608, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001165441726334393, "signal/frontier_coverage_15/centered_abs_mean": 0.06997437477111816, "signal/frontier_coverage_15/group_std_mean": 0.08664604872465134, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015636095218360423, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010006335913203656, "signal/frontier_coverage_20/centered_abs_mean": 0.10055458694696426, "signal/frontier_coverage_20/group_std_mean": 0.12494523078203201, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022478773444890975, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014379305765032768, "signal/frontier_coverage_25/centered_abs_mean": 0.1401418536901474, "signal/frontier_coverage_25/group_std_mean": 0.17512567937374116, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031318724155426025, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020040284842252733, "signal/frontier_coverage_5/centered_abs_mean": 0.14670295566320418, "signal/frontier_coverage_5/group_std_mean": 0.1928351879119873, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032691262662410736, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020978521322831513, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.338723349571228, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4019467055797577, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5293410301208497, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387233465909958, "step": 385 }, { "calibration/aurc": 0.131628539126266, "calibration/batch_distribution_entropy": 0.9687453953559209, "calibration/buffer_distribution_entropy": 0.984400946136547, "calibration/confidence_entropy": 0.4489368869309242, "calibration/coverage@0%": 0.10330473885184659, "calibration/coverage@1%": 0.10330473885184659, "calibration/coverage@10%": 0.49364135574786017, "calibration/coverage@15%": 0.5628806538752541, "calibration/coverage@20%": 0.7786696103466213, "calibration/coverage@25%": 0.8854545709793351, "calibration/coverage@30%": 0.9405983265947888, "calibration/coverage@5%": 0.30226593902208354, "calibration/ece": 0.2354010264798198, "calibration/mean_confidence": 0.4826527057723909, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007552083333333348, "completions/max_length": 4029.8, "completions/max_terminated_length": 4029.8, "completions/mean_length": 1078.279345703125, "completions/mean_terminated_length": 1086.5503173828124, "completions/min_length": 0.0, "completions/min_terminated_length": 222.4, "epoch": 0.9359883001462481, "grad_norm": 0.001781121944077313, "learning_rate": 3.90625e-06, "loss": -0.0095, "num_tokens": 980115523.0, "reward": 1.0005874991416932, "reward_std": 0.10873806923627853, "rewards/accuracy_reward": 0.6994791626930237, "rewards/brier_reward": 0.8032535314559937, "rewards/confidence_uniqueness_reward": 0.9435503482818604, "rewards/format_reward": 0.9924479126930237, "rewards/frontier_coverage_0": 0.024669825052842497, "rewards/frontier_coverage_1": 0.024669825052842497, "rewards/frontier_coverage_10": 0.03370050191879272, "rewards/frontier_coverage_15": 0.07053077518939972, "rewards/frontier_coverage_20": 0.12623442858457565, "rewards/frontier_coverage_25": 0.20001912415027617, "rewards/frontier_coverage_5": 0.024624837329611182, "rewards/frontier_entropy_batch_reward": -0.27270071804523466, "signal/accuracy_reward/centered_abs_mean": 0.1298828125, "signal/accuracy_reward/group_std_mean": 0.17255926728248597, "signal/accuracy_reward/group_zero_std_frac": 0.5083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0192892909049989, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06494140625, "signal/advantage_abs_mean": 0.7568698048591613, "signal/advantage_pre_scale_abs_mean": 0.08172477781772614, "signal/advantage_pre_scale_std": 0.1360209256410599, "signal/advantage_std": 0.9829101800918579, "signal/brier_reward/centered_abs_mean": 0.1270618975162506, "signal/brier_reward/group_std_mean": 0.16338178813457488, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20035703480243683, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012706190161406995, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02496674135327339, "signal/confidence_uniqueness_reward/group_std_mean": 0.038834089413285255, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03995698355138302, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024966741679236294, "signal/format_reward/centered_abs_mean": 0.013047960214316845, "signal/format_reward/group_std_mean": 0.02432667538523674, "signal/format_reward/group_zero_std_frac": 0.9027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.105986687541008, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006523980107158422, "signal/frontier_coverage_0/centered_abs_mean": 0.19202699661254882, "signal/frontier_coverage_0/group_std_mean": 0.2493561327457428, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04318622797727585, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027459860779345036, "signal/frontier_coverage_1/centered_abs_mean": 0.19202699661254882, "signal/frontier_coverage_1/group_std_mean": 0.2493561327457428, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04318622797727585, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027459860779345036, "signal/frontier_coverage_10/centered_abs_mean": 0.09750582873821259, "signal/frontier_coverage_10/group_std_mean": 0.12793900519609452, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.021928153187036514, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013943333411589264, "signal/frontier_coverage_15/centered_abs_mean": 0.06799988895654678, "signal/frontier_coverage_15/group_std_mean": 0.0844225361943245, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015377411991357804, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.000972398417070508, "signal/frontier_coverage_20/centered_abs_mean": 0.08751165270805358, "signal/frontier_coverage_20/group_std_mean": 0.10887247174978257, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01980235055088997, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012514166766777635, "signal/frontier_coverage_25/centered_abs_mean": 0.1190670147538185, "signal/frontier_coverage_25/group_std_mean": 0.14888398349285126, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026936568692326544, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017026583664119244, "signal/frontier_coverage_5/centered_abs_mean": 0.19167168736457824, "signal/frontier_coverage_5/group_std_mean": 0.24889355897903442, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.043106149137020114, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002740905107930303, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31426780223846434, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38453606367111204, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.496795254945755, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03142678178846836, "step": 390 }, { "calibration/aurc": 0.12006739177635935, "calibration/batch_distribution_entropy": 0.9621003417898498, "calibration/buffer_distribution_entropy": 0.9850575871337772, "calibration/confidence_entropy": 0.4640747669149102, "calibration/coverage@0%": 0.12258103953836168, "calibration/coverage@1%": 0.1806893877947558, "calibration/coverage@10%": 0.5286890507883714, "calibration/coverage@15%": 0.6967522164070671, "calibration/coverage@20%": 0.7996866238435315, "calibration/coverage@25%": 0.8778432408702359, "calibration/coverage@30%": 0.9411440961700013, "calibration/coverage@5%": 0.3512544272331463, "calibration/ece": 0.13712931379974697, "calibration/mean_confidence": 0.5683317673173456, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012239583333333326, "completions/max_length": 4045.0, "completions/max_terminated_length": 4045.0, "completions/mean_length": 1162.4337890625, "completions/mean_terminated_length": 1176.98583984375, "completions/min_length": 0.0, "completions/min_terminated_length": 252.0, "epoch": 0.9479881501481231, "grad_norm": 0.0016314678359776735, "learning_rate": 3.876201923076923e-06, "loss": -0.0318, "num_tokens": 996640808.0, "reward": 0.9922902107238769, "reward_std": 0.11627082526683807, "rewards/accuracy_reward": 0.6799479246139526, "rewards/brier_reward": 0.8181400537490845, "rewards/confidence_uniqueness_reward": 0.9398838996887207, "rewards/format_reward": 0.9876736044883728, "rewards/frontier_coverage_0": 0.04872595062479377, "rewards/frontier_coverage_1": 0.04872595062479377, "rewards/frontier_coverage_10": 0.04636274129152298, "rewards/frontier_coverage_15": 0.07839905470609665, "rewards/frontier_coverage_20": 0.13514964878559113, "rewards/frontier_coverage_25": 0.20990723073482515, "rewards/frontier_coverage_5": 0.04886599145829677, "rewards/frontier_entropy_batch_reward": -0.2613369792699814, "signal/accuracy_reward/centered_abs_mean": 0.12535264641046523, "signal/accuracy_reward/group_std_mean": 0.16798610091209412, "signal/accuracy_reward/group_zero_std_frac": 0.5083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9664302825927734, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06267632320523261, "signal/advantage_abs_mean": 0.7464218854904174, "signal/advantage_pre_scale_abs_mean": 0.08505754321813583, "signal/advantage_pre_scale_std": 0.14605462849140166, "signal/advantage_std": 0.9829274535179138, "signal/brier_reward/centered_abs_mean": 0.12280103266239166, "signal/brier_reward/group_std_mean": 0.1588895171880722, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18971530497074127, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01228010393679142, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030538421869277955, "signal/confidence_uniqueness_reward/group_std_mean": 0.050799714773893355, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047202929854393005, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0030538421124219894, "signal/format_reward/centered_abs_mean": 0.02004123255610466, "signal/format_reward/group_std_mean": 0.03813575953245163, "signal/format_reward/group_zero_std_frac": 0.8416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.153868405520916, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01002061627805233, "signal/frontier_coverage_0/centered_abs_mean": 0.17273998260498047, "signal/frontier_coverage_0/group_std_mean": 0.22136751115322112, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038172660022974016, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002470181742683053, "signal/frontier_coverage_1/centered_abs_mean": 0.17273998260498047, "signal/frontier_coverage_1/group_std_mean": 0.22136751115322112, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038172660022974016, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002470181742683053, "signal/frontier_coverage_10/centered_abs_mean": 0.09057945162057876, "signal/frontier_coverage_10/group_std_mean": 0.11731237322092056, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019993556663393974, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012952861143276095, "signal/frontier_coverage_15/centered_abs_mean": 0.06736490577459335, "signal/frontier_coverage_15/group_std_mean": 0.0837602436542511, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015023627690970898, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009633181616663933, "signal/frontier_coverage_20/centered_abs_mean": 0.0906154453754425, "signal/frontier_coverage_20/group_std_mean": 0.11392967402935028, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020246949046850204, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012958008563145995, "signal/frontier_coverage_25/centered_abs_mean": 0.12485620528459548, "signal/frontier_coverage_25/group_std_mean": 0.1578374296426773, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027895611152052878, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001785443676635623, "signal/frontier_coverage_5/centered_abs_mean": 0.17239981293678283, "signal/frontier_coverage_5/group_std_mean": 0.2209311842918396, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038096596300601956, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002465317351743579, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30889744162559507, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3786299705505371, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4820574581623077, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03088974431157112, "step": 395 }, { "calibration/aurc": 0.1328948076275825, "calibration/batch_distribution_entropy": 0.9624684539519253, "calibration/buffer_distribution_entropy": 0.9846569152425998, "calibration/confidence_entropy": 0.49333697436117746, "calibration/coverage@0%": 0.12847558820296975, "calibration/coverage@1%": 0.14436599916187387, "calibration/coverage@10%": 0.5365831021921853, "calibration/coverage@15%": 0.6413618744077871, "calibration/coverage@20%": 0.6948443389641212, "calibration/coverage@25%": 0.8872661576030986, "calibration/coverage@30%": 0.9440488461282639, "calibration/coverage@5%": 0.4174874290234561, "calibration/ece": 0.171763445811933, "calibration/mean_confidence": 0.5922798731831165, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012065972222222231, "completions/max_length": 4021.0, "completions/max_terminated_length": 4021.0, "completions/mean_length": 1121.1360595703125, "completions/mean_terminated_length": 1134.825244140625, "completions/min_length": 0.0, "completions/min_terminated_length": 252.6, "epoch": 0.9599880001499981, "grad_norm": 0.001862908829934895, "learning_rate": 3.846153846153847e-06, "loss": -0.0404, "num_tokens": 1012627495.0, "reward": 0.9898022294044495, "reward_std": 0.12157986909151078, "rewards/accuracy_reward": 0.6822048664093018, "rewards/brier_reward": 0.8187066793441773, "rewards/confidence_uniqueness_reward": 0.9391976833343506, "rewards/format_reward": 0.9879340291023254, "rewards/frontier_coverage_0": 0.0362746462225914, "rewards/frontier_coverage_1": 0.0362746462225914, "rewards/frontier_coverage_10": 0.03540731780230999, "rewards/frontier_coverage_15": 0.07417062669992447, "rewards/frontier_coverage_20": 0.13375866413116455, "rewards/frontier_coverage_25": 0.20971741974353791, "rewards/frontier_coverage_5": 0.03616565503180027, "rewards/frontier_entropy_batch_reward": -0.29090956449508665, "signal/accuracy_reward/centered_abs_mean": 0.13099500834941863, "signal/accuracy_reward/group_std_mean": 0.17723969519138336, "signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9267226219177246, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06549750417470931, "signal/advantage_abs_mean": 0.7497529029846192, "signal/advantage_pre_scale_abs_mean": 0.08834384828805923, "signal/advantage_pre_scale_std": 0.14922945201396942, "signal/advantage_std": 0.9830656886100769, "signal/brier_reward/centered_abs_mean": 0.1201691061258316, "signal/brier_reward/group_std_mean": 0.15796004235744476, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17042254209518432, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012016911059617996, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031691993772983554, "signal/confidence_uniqueness_reward/group_std_mean": 0.05179332569241524, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.045310333371162415, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003169199451804161, "signal/format_reward/centered_abs_mean": 0.020817056857049467, "signal/format_reward/group_std_mean": 0.03874273598194122, "signal/format_reward/group_zero_std_frac": 0.8444444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14898683726787568, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010408528428524733, "signal/frontier_coverage_0/centered_abs_mean": 0.16637980341911315, "signal/frontier_coverage_0/group_std_mean": 0.21566648483276368, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03370913192629814, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023792311549186707, "signal/frontier_coverage_1/centered_abs_mean": 0.16637980341911315, "signal/frontier_coverage_1/group_std_mean": 0.21566648483276368, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03370913192629814, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023792311549186707, "signal/frontier_coverage_10/centered_abs_mean": 0.08630841374397277, "signal/frontier_coverage_10/group_std_mean": 0.11361690014600753, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01745337210595608, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001234210329130292, "signal/frontier_coverage_15/centered_abs_mean": 0.06675836741924286, "signal/frontier_coverage_15/group_std_mean": 0.08330333530902863, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013568529859185219, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009546446264721454, "signal/frontier_coverage_20/centered_abs_mean": 0.0917587623000145, "signal/frontier_coverage_20/group_std_mean": 0.11500014364719391, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018640580773353576, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013121502473950387, "signal/frontier_coverage_25/centered_abs_mean": 0.1281261071562767, "signal/frontier_coverage_25/group_std_mean": 0.16147857010364533, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02601141035556793, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018322034273296594, "signal/frontier_coverage_5/centered_abs_mean": 0.1659935176372528, "signal/frontier_coverage_5/group_std_mean": 0.21517403721809386, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033628907054662704, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002373707154765725, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32937549352645873, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39777472615242004, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4694278180599213, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03293755128979683, "step": 400 }, { "epoch": 0.9599880001499981, "eval_calibration/aurc": 0.10586284667182204, "eval_calibration/batch_distribution_entropy": 0.9507627214676093, "eval_calibration/buffer_distribution_entropy": 0.9840021499390136, "eval_calibration/confidence_entropy": 0.5006953442721606, "eval_calibration/coverage@0%": 0.379872311827957, "eval_calibration/coverage@1%": 0.379872311827957, "eval_calibration/coverage@10%": 0.5278897849462365, "eval_calibration/coverage@15%": 0.6759072580645161, "eval_calibration/coverage@20%": 0.899361559139785, "eval_calibration/coverage@25%": 0.967741935483871, "eval_calibration/coverage@30%": 0.9946236559139785, "eval_calibration/coverage@5%": 0.4275873655913978, "eval_calibration/ece": 0.2627307963709677, "eval_calibration/mean_confidence": 0.5663138272849463, "eval_completions/clipped_ratio": 0.01128472222222221, "eval_completions/max_length": 3346.8333333333335, "eval_completions/max_terminated_length": 3346.8333333333335, "eval_completions/mean_length": 1125.2504069010417, "eval_completions/mean_terminated_length": 1138.199727376302, "eval_completions/min_length": 70.0, "eval_completions/min_terminated_length": 323.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 1012627495.0, "eval_reward": 0.9165649116039276, "eval_reward_std": 0.23890877763430277, "eval_rewards/accuracy_reward": 0.6935763955116272, "eval_rewards/brier_reward": 0.8002510666847229, "eval_rewards/confidence_uniqueness_reward": 0.8845955729484558, "eval_rewards/format_reward": 0.9869791567325592, "eval_rewards/frontier_coverage_0": 0.014418061745042602, "eval_rewards/frontier_coverage_1": 0.014418061745042602, "eval_rewards/frontier_coverage_10": 0.030281184454603743, "eval_rewards/frontier_coverage_15": 0.06701069946090381, "eval_rewards/frontier_coverage_20": 0.12111099312702815, "eval_rewards/frontier_coverage_25": 0.19284088909626007, "eval_rewards/frontier_coverage_5": 0.01449225222071012, "eval_rewards/frontier_entropy_batch_reward": -0.9869791567325592, "eval_runtime": 214.8674, "eval_samples_per_second": 4.654, "eval_signal/accuracy_reward/centered_abs_mean": 0.4076063384612401, "eval_signal/accuracy_reward/group_std_mean": 0.456951508919398, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8707355658213297, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20380316923062006, "eval_signal/advantage_abs_mean": 0.8465096652507782, "eval_signal/advantage_pre_scale_abs_mean": 0.20322489738464355, "eval_signal/advantage_pre_scale_std": 0.23819045225779215, "eval_signal/advantage_std": 0.986402283112208, "eval_signal/brier_reward/centered_abs_mean": 0.1865755319595337, "eval_signal/brier_reward/group_std_mean": 0.2435737227400144, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07968846708536148, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018657553009688854, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05388018364707629, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.09592856466770172, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02302951893458764, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005388018364707629, "eval_signal/format_reward/centered_abs_mean": 0.025119357431928318, "eval_signal/format_reward/group_std_mean": 0.07066754686335723, "eval_signal/format_reward/group_zero_std_frac": 0.6111111243565878, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.053442043562730156, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.012559678715964159, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.29021725555260974, "eval_signal/frontier_coverage_0/group_std_mean": 0.3992450336615245, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01775054633617401, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0041501066492249565, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.29021725555260974, "eval_signal/frontier_coverage_1/group_std_mean": 0.3992450336615245, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01775054633617401, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0041501066492249565, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.14385747288664183, "eval_signal/frontier_coverage_10/group_std_mean": 0.20860673983891806, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.008807006757706404, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002057161880657077, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.08661755422751109, "eval_signal/frontier_coverage_15/group_std_mean": 0.11023381600777309, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005300398683175445, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012386311039639015, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.14894455671310425, "eval_signal/frontier_coverage_20/group_std_mean": 0.18669422467549643, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009113150803993145, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0021299070601041117, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.23310701549053192, "eval_signal/frontier_coverage_25/group_std_mean": 0.28737075130144757, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014257684350013733, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003333430349205931, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2895810604095459, "eval_signal/frontier_coverage_5/group_std_mean": 0.39846739172935486, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.017711769479016464, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004141009141070147, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.025119357431928318, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.07066754686335723, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.6111111243565878, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.010688409054030975, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.002511935851847132, "eval_steps_per_second": 0.028, "step": 400 }, { "epoch": 0.9599880001499981, "step": 400, "train_probe_calibration/aurc": 0.14781498801845364, "train_probe_calibration/batch_distribution_entropy": 0.9258537799413419, "train_probe_calibration/buffer_distribution_entropy": 0.9840014136744348, "train_probe_calibration/confidence_entropy": 0.5174560087088435, "train_probe_calibration/coverage@0%": 0.1831317204301075, "train_probe_calibration/coverage@1%": 0.1831317204301075, "train_probe_calibration/coverage@10%": 0.4227150537634408, "train_probe_calibration/coverage@15%": 0.709845430107527, "train_probe_calibration/coverage@20%": 0.8366935483870969, "train_probe_calibration/coverage@25%": 0.931619623655914, "train_probe_calibration/coverage@30%": 0.9842069892473119, "train_probe_calibration/coverage@5%": 0.1831317204301075, "train_probe_calibration/ece": 0.24389171706989246, "train_probe_calibration/mean_confidence": 0.5452770329301075, "train_probe_completions/clipped_ratio": 0.013715277777777776, "train_probe_completions/max_length": 3625.8333333333335, "train_probe_completions/max_terminated_length": 3625.8333333333335, "train_probe_completions/mean_length": 1138.3854370117188, "train_probe_completions/mean_terminated_length": 1154.0794677734375, "train_probe_completions/min_length": 42.333333333333336, "train_probe_completions/min_terminated_length": 249.33333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 1012627495.0, "train_probe_reward": 0.9373580018679301, "train_probe_reward_std": 0.22980502992868423, "train_probe_rewards/accuracy_reward": 0.7317708333333334, "train_probe_rewards/brier_reward": 0.81206147869428, "train_probe_rewards/confidence_uniqueness_reward": 0.8892526924610138, "train_probe_rewards/format_reward": 0.9887152711550394, "train_probe_rewards/frontier_coverage_0": -0.0023442222348724804, "train_probe_rewards/frontier_coverage_1": -0.0023442222348724804, "train_probe_rewards/frontier_coverage_10": 0.021256126773854096, "train_probe_rewards/frontier_coverage_15": 0.06604259957869847, "train_probe_rewards/frontier_coverage_20": 0.12492299949129422, "train_probe_rewards/frontier_coverage_25": 0.20412471145391464, "train_probe_rewards/frontier_coverage_5": -0.0022142972253883877, "train_probe_rewards/frontier_entropy_batch_reward": -0.9887152711550394, "train_probe_runtime": 208.4445, "train_probe_samples_per_second": 4.797, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3841688384612401, "train_probe_signal/accuracy_reward/group_std_mean": 0.44367093841234845, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8549265762170156, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.19208441923062006, "train_probe_signal/advantage_abs_mean": 0.8276252249876658, "train_probe_signal/advantage_pre_scale_abs_mean": 0.1909246121843656, "train_probe_signal/advantage_pre_scale_std": 0.22932683179775873, "train_probe_signal/advantage_std": 0.9863846600055695, "train_probe_signal/brier_reward/centered_abs_mean": 0.1636638417840004, "train_probe_signal/brier_reward/group_std_mean": 0.22015838821729025, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07289181649684906, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016366383992135525, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.05082453042268753, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.08612562467654546, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02260653271029393, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0050824531354010105, "train_probe_signal/format_reward/centered_abs_mean": 0.021647135416666668, "train_probe_signal/format_reward/group_std_mean": 0.057857210437456764, "train_probe_signal/format_reward/group_zero_std_frac": 0.694444457689921, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.04728267093499502, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.010823567708333334, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2758088956276576, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.3786073128382365, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017573293919364612, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003944066935218871, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2758088956276576, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.3786073128382365, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017573293919364612, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003944066935218871, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.13124024122953415, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.19279029220342636, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.008360948336000243, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0018767355165133874, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.07852580770850182, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.10188833996653557, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00499945521975557, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011229190470961232, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.13784591356913248, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.17390990008910498, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.008773462225993475, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019711965966659286, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.21520801385243735, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.26701483378807706, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.013698053235809008, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003077474539168179, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2750549068053563, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.37767767409483594, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0175249179204305, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0039332850913827615, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.021647135416666668, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.057857210437456764, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.694444457689921, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.009456534404307604, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0021647136115158596, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.16543027853562334, "calibration/batch_distribution_entropy": 0.9582248127058456, "calibration/buffer_distribution_entropy": 0.9839146582790985, "calibration/confidence_entropy": 0.5125660195708808, "calibration/coverage@0%": 0.02267736859919655, "calibration/coverage@1%": 0.02267736859919655, "calibration/coverage@10%": 0.22190620223407792, "calibration/coverage@15%": 0.5628588181484097, "calibration/coverage@20%": 0.7633469933576739, "calibration/coverage@25%": 0.8386309124767225, "calibration/coverage@30%": 0.9379307262569831, "calibration/coverage@5%": 0.07492748067138952, "calibration/ece": 0.13882747421097189, "calibration/mean_confidence": 0.5993457340070647, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016579861111111094, "completions/max_length": 4040.6, "completions/max_terminated_length": 4040.6, "completions/mean_length": 1116.2814453125, "completions/mean_terminated_length": 1135.14482421875, "completions/min_length": 0.0, "completions/min_terminated_length": 217.0, "epoch": 0.9719878501518731, "grad_norm": 0.001691743964329362, "learning_rate": 3.81610576923077e-06, "loss": -0.0443, "num_tokens": 1028573649.0, "reward": 0.9947227001190185, "reward_std": 0.1256895914673805, "rewards/accuracy_reward": 0.7001736164093018, "rewards/brier_reward": 0.8061349272727967, "rewards/confidence_uniqueness_reward": 0.9350310444831849, "rewards/format_reward": 0.9833333253860473, "rewards/frontier_coverage_0": 0.01580127151682973, "rewards/frontier_coverage_1": 0.01580127151682973, "rewards/frontier_coverage_10": 0.029630134254693984, "rewards/frontier_coverage_15": 0.06885228753089905, "rewards/frontier_coverage_20": 0.12561193257570266, "rewards/frontier_coverage_25": 0.1998952865600586, "rewards/frontier_coverage_5": 0.01589932944625616, "rewards/frontier_entropy_batch_reward": -0.2788967788219452, "signal/accuracy_reward/centered_abs_mean": 0.1270507827401161, "signal/accuracy_reward/group_std_mean": 0.1727653205394745, "signal/accuracy_reward/group_zero_std_frac": 0.4944444477558136, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9054166555404664, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06352539137005805, "signal/advantage_abs_mean": 0.7439297795295715, "signal/advantage_pre_scale_abs_mean": 0.09049908965826034, "signal/advantage_pre_scale_std": 0.1570802301168442, "signal/advantage_std": 0.9830506086349488, "signal/brier_reward/centered_abs_mean": 0.12726181447505952, "signal/brier_reward/group_std_mean": 0.16593731343746185, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18182174265384674, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01272618155926466, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03679776974022388, "signal/confidence_uniqueness_reward/group_std_mean": 0.060738787055015564, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05204875022172928, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003679777169600129, "signal/format_reward/centered_abs_mean": 0.026974826864898205, "signal/format_reward/group_std_mean": 0.049242686852812766, "signal/format_reward/group_zero_std_frac": 0.8027777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18914935141801834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013487413432449103, "signal/frontier_coverage_0/centered_abs_mean": 0.17137164175510405, "signal/frontier_coverage_0/group_std_mean": 0.2231689751148224, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03501456528902054, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002450614469125867, "signal/frontier_coverage_1/centered_abs_mean": 0.17137164175510405, "signal/frontier_coverage_1/group_std_mean": 0.2231689751148224, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03501456528902054, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002450614469125867, "signal/frontier_coverage_10/centered_abs_mean": 0.08988010734319687, "signal/frontier_coverage_10/group_std_mean": 0.11845540404319763, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018348486348986625, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012852855259552598, "signal/frontier_coverage_15/centered_abs_mean": 0.06513071209192275, "signal/frontier_coverage_15/group_std_mean": 0.08108891993761062, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013369975425302983, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009313691407442093, "signal/frontier_coverage_20/centered_abs_mean": 0.08782992511987686, "signal/frontier_coverage_20/group_std_mean": 0.10988791435956954, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01804915312677622, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001255967910401523, "signal/frontier_coverage_25/centered_abs_mean": 0.12174516469240189, "signal/frontier_coverage_25/group_std_mean": 0.15290275812149048, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02501319572329521, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017409559572115541, "signal/frontier_coverage_5/centered_abs_mean": 0.17103633284568787, "signal/frontier_coverage_5/group_std_mean": 0.22273699343204498, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03494622781872749, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024458195082843305, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32988558411598207, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39726953506469725, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47342961430549624, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03298855870962143, "step": 405 }, { "calibration/aurc": 0.16013978437888046, "calibration/batch_distribution_entropy": 0.9725485062078045, "calibration/buffer_distribution_entropy": 0.9847711501721669, "calibration/confidence_entropy": 0.4818931492234226, "calibration/coverage@0%": 0.00813677654975952, "calibration/coverage@1%": 0.06125330771507116, "calibration/coverage@10%": 0.39967957270893634, "calibration/coverage@15%": 0.5416899920662741, "calibration/coverage@20%": 0.6648686913581028, "calibration/coverage@25%": 0.8260833833251107, "calibration/coverage@30%": 0.9247456032379173, "calibration/coverage@5%": 0.14571245549803835, "calibration/ece": 0.17383732274943015, "calibration/mean_confidence": 0.5179158885787857, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028298611111111094, "completions/max_length": 4012.0, "completions/max_terminated_length": 4012.0, "completions/mean_length": 1138.8479248046874, "completions/mean_terminated_length": 1172.7252685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 261.4, "epoch": 0.983987700153748, "grad_norm": 0.0016860616160556674, "learning_rate": 3.7860576923076927e-06, "loss": -0.066, "num_tokens": 1044789945.0, "reward": 0.9747801542282104, "reward_std": 0.1305119052529335, "rewards/accuracy_reward": 0.674913203716278, "rewards/brier_reward": 0.778337562084198, "rewards/confidence_uniqueness_reward": 0.9256033182144165, "rewards/format_reward": 0.9717013955116272, "rewards/frontier_coverage_0": 0.015211716108024121, "rewards/frontier_coverage_1": 0.015211716108024121, "rewards/frontier_coverage_10": 0.02842825446277857, "rewards/frontier_coverage_15": 0.06544613540172577, "rewards/frontier_coverage_20": 0.11701254844665528, "rewards/frontier_coverage_25": 0.18447456359863282, "rewards/frontier_coverage_5": 0.01528911516070366, "rewards/frontier_entropy_batch_reward": -0.25228601694107056, "signal/accuracy_reward/centered_abs_mean": 0.1281629756093025, "signal/accuracy_reward/group_std_mean": 0.17185668051242828, "signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9030983686447144, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06408148780465125, "signal/advantage_abs_mean": 0.741316843032837, "signal/advantage_pre_scale_abs_mean": 0.09381006360054016, "signal/advantage_pre_scale_std": 0.1637304425239563, "signal/advantage_std": 0.9830609083175659, "signal/brier_reward/centered_abs_mean": 0.14042544662952422, "signal/brier_reward/group_std_mean": 0.18072171807289122, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1998526006937027, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014042544737458229, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04530714936554432, "signal/confidence_uniqueness_reward/group_std_mean": 0.07276474684476852, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06441220045089721, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045307148713618515, "signal/format_reward/centered_abs_mean": 0.03671875, "signal/format_reward/group_std_mean": 0.06284484639763832, "signal/format_reward/group_zero_std_frac": 0.7638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.26043003499507905, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018359375, "signal/frontier_coverage_0/centered_abs_mean": 0.18951506912708282, "signal/frontier_coverage_0/group_std_mean": 0.24266450405120848, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03843116760253906, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027100653387606144, "signal/frontier_coverage_1/centered_abs_mean": 0.18951506912708282, "signal/frontier_coverage_1/group_std_mean": 0.24266450405120848, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03843116760253906, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027100653387606144, "signal/frontier_coverage_10/centered_abs_mean": 0.10224405527114869, "signal/frontier_coverage_10/group_std_mean": 0.13257486820220948, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.02073887400329113, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001462090015411377, "signal/frontier_coverage_15/centered_abs_mean": 0.0658559963107109, "signal/frontier_coverage_15/group_std_mean": 0.08266130387783051, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013465725630521775, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009417407214641571, "signal/frontier_coverage_20/centered_abs_mean": 0.08396470397710801, "signal/frontier_coverage_20/group_std_mean": 0.1060999408364296, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017190796695649622, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012006952660158277, "signal/frontier_coverage_25/centered_abs_mean": 0.11455650329589843, "signal/frontier_coverage_25/group_std_mean": 0.14572837352752685, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02343129813671112, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001638158014975488, "signal/frontier_coverage_5/centered_abs_mean": 0.18928508162498475, "signal/frontier_coverage_5/group_std_mean": 0.24237094819545746, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038384463638067245, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002706776699051261, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31539603471755984, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3847527980804443, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4506865441799164, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03153960295021534, "step": 410 }, { "calibration/aurc": 0.14177884350230602, "calibration/batch_distribution_entropy": 0.9555859143793043, "calibration/buffer_distribution_entropy": 0.9847064709840442, "calibration/confidence_entropy": 0.4622752762053195, "calibration/coverage@0%": 0.05793784020545868, "calibration/coverage@1%": 0.1714656916643446, "calibration/coverage@10%": 0.40535473665354377, "calibration/coverage@15%": 0.5327791772841384, "calibration/coverage@20%": 0.6222954391422698, "calibration/coverage@25%": 0.8344170212765958, "calibration/coverage@30%": 0.9840425531914894, "calibration/coverage@5%": 0.3337851415956801, "calibration/ece": 0.19038391246975522, "calibration/mean_confidence": 0.5694251855815575, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025173611111111115, "completions/max_length": 4014.2, "completions/max_terminated_length": 4014.2, "completions/mean_length": 1131.0906982421875, "completions/mean_terminated_length": 1160.0925537109374, "completions/min_length": 0.0, "completions/min_terminated_length": 264.6, "epoch": 0.995987550155623, "grad_norm": 0.0018346694996580482, "learning_rate": 3.756009615384616e-06, "loss": -0.0671, "num_tokens": 1060962382.0, "reward": 0.9879943609237671, "reward_std": 0.12868785858154297, "rewards/accuracy_reward": 0.7119791626930236, "rewards/brier_reward": 0.8049194931983947, "rewards/confidence_uniqueness_reward": 0.9219497919082642, "rewards/format_reward": 0.9748263955116272, "rewards/frontier_coverage_0": 0.009872391540557145, "rewards/frontier_coverage_1": 0.009872391540557145, "rewards/frontier_coverage_10": 0.02418987303972244, "rewards/frontier_coverage_15": 0.08309966027736664, "rewards/frontier_coverage_20": 0.15105039477348328, "rewards/frontier_coverage_25": 0.23416467607021332, "rewards/frontier_coverage_5": 0.009905415773391723, "rewards/frontier_entropy_batch_reward": -0.35562185049057005, "signal/accuracy_reward/centered_abs_mean": 0.12139756828546525, "signal/accuracy_reward/group_std_mean": 0.16576847732067107, "signal/accuracy_reward/group_zero_std_frac": 0.5055555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.945546567440033, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06069878414273262, "signal/advantage_abs_mean": 0.7494885206222535, "signal/advantage_pre_scale_abs_mean": 0.09371411204338073, "signal/advantage_pre_scale_std": 0.16432504653930663, "signal/advantage_std": 0.9829301834106445, "signal/brier_reward/centered_abs_mean": 0.13123094588518142, "signal/brier_reward/group_std_mean": 0.16721619367599488, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20494545698165895, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013123095408082009, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04638012982904911, "signal/confidence_uniqueness_reward/group_std_mean": 0.06866296231746674, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07306440323591232, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004638013010844588, "signal/format_reward/centered_abs_mean": 0.03458116371184587, "signal/format_reward/group_std_mean": 0.05476707965135574, "signal/format_reward/group_zero_std_frac": 0.8083333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2730853110551834, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017290581855922936, "signal/frontier_coverage_0/centered_abs_mean": 0.15530972182750702, "signal/frontier_coverage_0/group_std_mean": 0.1991082549095154, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03483571857213974, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022209289949387313, "signal/frontier_coverage_1/centered_abs_mean": 0.15530972182750702, "signal/frontier_coverage_1/group_std_mean": 0.1991082549095154, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03483571857213974, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022209289949387313, "signal/frontier_coverage_10/centered_abs_mean": 0.08007914274930954, "signal/frontier_coverage_10/group_std_mean": 0.10351646095514297, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01798994392156601, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001145131781231612, "signal/frontier_coverage_15/centered_abs_mean": 0.07401133924722672, "signal/frontier_coverage_15/group_std_mean": 0.09164203703403473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01649995595216751, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010583621682599188, "signal/frontier_coverage_20/centered_abs_mean": 0.1030519425868988, "signal/frontier_coverage_20/group_std_mean": 0.12880417853593826, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02288637273013592, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014736427692696452, "signal/frontier_coverage_25/centered_abs_mean": 0.1408381074666977, "signal/frontier_coverage_25/group_std_mean": 0.17753379344940184, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03124292306602001, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002013984927907586, "signal/frontier_coverage_5/centered_abs_mean": 0.15515292882919313, "signal/frontier_coverage_5/group_std_mean": 0.19891518354415894, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03480110689997673, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022186868358403445, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34138706922531126, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40525283217430114, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.532107800245285, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03413870632648468, "step": 415 }, { "calibration/aurc": 0.1294395860511989, "calibration/batch_distribution_entropy": 0.9733127958603607, "calibration/buffer_distribution_entropy": 0.9851089148524013, "calibration/confidence_entropy": 0.5005167237576094, "calibration/coverage@0%": 0.07778799529760441, "calibration/coverage@1%": 0.15989325845549915, "calibration/coverage@10%": 0.4693895026329297, "calibration/coverage@15%": 0.6701725069086626, "calibration/coverage@20%": 0.7768016760018321, "calibration/coverage@25%": 0.8552195307166761, "calibration/coverage@30%": 0.924125391823582, "calibration/coverage@5%": 0.3879996654336711, "calibration/ece": 0.1661169260612711, "calibration/mean_confidence": 0.5599344990323887, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018923611111111117, "completions/max_length": 3409.0, "completions/max_terminated_length": 3409.0, "completions/mean_length": 1049.7198974609375, "completions/mean_terminated_length": 1071.5993408203126, "completions/min_length": 118.2, "completions/min_terminated_length": 345.2, "epoch": 1.0095998800015, "grad_norm": 0.0021356670185923576, "learning_rate": 3.725961538461539e-06, "loss": -0.0564, "num_tokens": 1076905659.0, "reward": 0.9978170990943909, "reward_std": 0.13530487269163133, "rewards/accuracy_reward": 0.7213541507720947, "rewards/brier_reward": 0.7981032013893128, "rewards/confidence_uniqueness_reward": 0.9236820220947266, "rewards/format_reward": 0.9710069417953491, "rewards/frontier_coverage_0": -0.001099248230457306, "rewards/frontier_coverage_1": -0.001099248230457306, "rewards/frontier_coverage_10": 0.024827991053462027, "rewards/frontier_coverage_15": 0.07666746973991394, "rewards/frontier_coverage_20": 0.14054252803325654, "rewards/frontier_coverage_25": 0.22130533158779145, "rewards/frontier_coverage_5": -0.0010948097333312035, "rewards/frontier_entropy_batch_reward": -0.2712071597576141, "signal/accuracy_reward/centered_abs_mean": 0.14091796875, "signal/accuracy_reward/group_std_mean": 0.1860247492790222, "signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9652868151664734, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.070458984375, "signal/advantage_abs_mean": 0.7590166449546814, "signal/advantage_pre_scale_abs_mean": 0.10070272982120514, "signal/advantage_pre_scale_std": 0.16899282336235047, "signal/advantage_std": 0.9830989837646484, "signal/brier_reward/centered_abs_mean": 0.13238780647516252, "signal/brier_reward/group_std_mean": 0.16885415315628052, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18241050839424133, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01323878075927496, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04513030052185059, "signal/confidence_uniqueness_reward/group_std_mean": 0.06864937618374825, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.062384354323148726, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004513029893860221, "signal/format_reward/centered_abs_mean": 0.03574218712747097, "signal/format_reward/group_std_mean": 0.057401788979768754, "signal/format_reward/group_zero_std_frac": 0.7972222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2461713194847107, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017871093563735486, "signal/frontier_coverage_0/centered_abs_mean": 0.18035004138946534, "signal/frontier_coverage_0/group_std_mean": 0.23116945028305053, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035538754612207415, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002579005528241396, "signal/frontier_coverage_1/centered_abs_mean": 0.18035004138946534, "signal/frontier_coverage_1/group_std_mean": 0.23116945028305053, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035538754612207415, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002579005528241396, "signal/frontier_coverage_10/centered_abs_mean": 0.08763528019189834, "signal/frontier_coverage_10/group_std_mean": 0.11366626918315888, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01727503500878811, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012531845597550273, "signal/frontier_coverage_15/centered_abs_mean": 0.06749407202005386, "signal/frontier_coverage_15/group_std_mean": 0.08479798883199692, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013384480029344559, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009651652304455638, "signal/frontier_coverage_20/centered_abs_mean": 0.09091575294733048, "signal/frontier_coverage_20/group_std_mean": 0.11489048898220063, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.018049764446914196, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013000952312722802, "signal/frontier_coverage_25/centered_abs_mean": 0.12579586505889892, "signal/frontier_coverage_25/group_std_mean": 0.15938990116119384, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.024969753250479697, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001798880915157497, "signal/frontier_coverage_5/centered_abs_mean": 0.18029914498329164, "signal/frontier_coverage_5/group_std_mean": 0.23110443353652954, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03552853986620903, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025782777462154626, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31849284172058107, "signal/frontier_entropy_batch_reward/group_std_mean": 0.389286732673645, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4434321105480194, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184928484261036, "step": 420 }, { "calibration/aurc": 0.1664382462609737, "calibration/batch_distribution_entropy": 0.9444575270215367, "calibration/buffer_distribution_entropy": 0.9855649264562201, "calibration/confidence_entropy": 0.49403454100795496, "calibration/coverage@0%": 0.13590126976763067, "calibration/coverage@1%": 0.1441885625853102, "calibration/coverage@10%": 0.18859511087385578, "calibration/coverage@15%": 0.45050341103324143, "calibration/coverage@20%": 0.7215650787918753, "calibration/coverage@25%": 0.8493212377597885, "calibration/coverage@30%": 0.946567388963566, "calibration/coverage@5%": 0.16297309297205056, "calibration/ece": 0.13733663184872985, "calibration/mean_confidence": 0.6200478468294505, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01944444444444442, "completions/max_length": 3989.8, "completions/max_terminated_length": 3989.8, "completions/mean_length": 1118.1100830078126, "completions/mean_terminated_length": 1140.3765380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 275.8, "epoch": 1.021599730003375, "grad_norm": 0.002060416853055358, "learning_rate": 3.695913461538462e-06, "loss": -0.0556, "num_tokens": 1092907503.0, "reward": 0.9950901508331299, "reward_std": 0.12893914580345153, "rewards/accuracy_reward": 0.7082465291023254, "rewards/brier_reward": 0.8094497084617615, "rewards/confidence_uniqueness_reward": 0.9306891322135925, "rewards/format_reward": 0.9805555701255798, "rewards/frontier_coverage_0": 0.01462572100572288, "rewards/frontier_coverage_1": 0.01462572100572288, "rewards/frontier_coverage_10": 0.03155530486255884, "rewards/frontier_coverage_15": 0.07923973947763444, "rewards/frontier_coverage_20": 0.14191269278526306, "rewards/frontier_coverage_25": 0.22000607550144197, "rewards/frontier_coverage_5": 0.014647024078294634, "rewards/frontier_entropy_batch_reward": -0.30712297558784485, "signal/accuracy_reward/centered_abs_mean": 0.12706705778837205, "signal/accuracy_reward/group_std_mean": 0.1701394349336624, "signal/accuracy_reward/group_zero_std_frac": 0.5083333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9200910568237305, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06353352889418602, "signal/advantage_abs_mean": 0.7581474661827088, "signal/advantage_pre_scale_abs_mean": 0.09590282887220383, "signal/advantage_pre_scale_std": 0.16368852853775023, "signal/advantage_std": 0.9830290079116821, "signal/brier_reward/centered_abs_mean": 0.12702373564243316, "signal/brier_reward/group_std_mean": 0.1641145259141922, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1852224737405777, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01270237360149622, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.041591137647628784, "signal/confidence_uniqueness_reward/group_std_mean": 0.06271186843514442, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06073887199163437, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041591140907257795, "signal/format_reward/centered_abs_mean": 0.03104383647441864, "signal/format_reward/group_std_mean": 0.04997814521193504, "signal/format_reward/group_zero_std_frac": 0.819444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.22688681483268738, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01552191823720932, "signal/frontier_coverage_0/centered_abs_mean": 0.15112363398075104, "signal/frontier_coverage_0/group_std_mean": 0.1970497488975525, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03148054778575897, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021610677940770984, "signal/frontier_coverage_1/centered_abs_mean": 0.15112363398075104, "signal/frontier_coverage_1/group_std_mean": 0.1970497488975525, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03148054778575897, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021610677940770984, "signal/frontier_coverage_10/centered_abs_mean": 0.06746098995208741, "signal/frontier_coverage_10/group_std_mean": 0.08946224302053452, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014050611481070518, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009646921185776592, "signal/frontier_coverage_15/centered_abs_mean": 0.07157327681779861, "signal/frontier_coverage_15/group_std_mean": 0.08971850723028182, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014921391755342484, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010234977817162871, "signal/frontier_coverage_20/centered_abs_mean": 0.10281662493944169, "signal/frontier_coverage_20/group_std_mean": 0.12900976240634918, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021420946344733238, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014702777145430446, "signal/frontier_coverage_25/centered_abs_mean": 0.1416828900575638, "signal/frontier_coverage_25/group_std_mean": 0.17844592332839965, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02950539030134678, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002026065322570503, "signal/frontier_coverage_5/centered_abs_mean": 0.1510587751865387, "signal/frontier_coverage_5/group_std_mean": 0.19697055518627166, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03146698512136936, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021601404063403607, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33122584223747253, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3963626027107239, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48287222981452943, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03312258496880531, "step": 425 }, { "calibration/aurc": 0.08296184763617191, "calibration/batch_distribution_entropy": 0.9267126629409962, "calibration/buffer_distribution_entropy": 0.9845992025019553, "calibration/confidence_entropy": 0.47815104058824354, "calibration/coverage@0%": 0.1433396054307729, "calibration/coverage@1%": 0.2590142086053761, "calibration/coverage@10%": 0.7602710057627717, "calibration/coverage@15%": 0.8728334245575626, "calibration/coverage@20%": 0.9432129173508483, "calibration/coverage@25%": 0.9770114942528736, "calibration/coverage@30%": 0.9994252873563217, "calibration/coverage@5%": 0.4102373856000341, "calibration/ece": 0.21019582274960474, "calibration/mean_confidence": 0.6381662601107146, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015017361111111117, "completions/max_length": 3976.8, "completions/max_terminated_length": 3976.8, "completions/mean_length": 1031.4753540039062, "completions/mean_terminated_length": 1046.8734375, "completions/min_length": 0.0, "completions/min_terminated_length": 241.6, "epoch": 1.03359958000525, "grad_norm": 0.002130492590367794, "learning_rate": 3.665865384615385e-06, "loss": -0.0395, "num_tokens": 1107866579.0, "reward": 1.0310980796813964, "reward_std": 0.11252327859401703, "rewards/accuracy_reward": 0.7815972208976746, "rewards/brier_reward": 0.8353811860084533, "rewards/confidence_uniqueness_reward": 0.930880856513977, "rewards/format_reward": 0.9849826335906983, "rewards/frontier_coverage_0": -0.011007923632860184, "rewards/frontier_coverage_1": -0.011007923632860184, "rewards/frontier_coverage_10": 0.02423018105328083, "rewards/frontier_coverage_15": 0.10173185169696808, "rewards/frontier_coverage_20": 0.18633103370666504, "rewards/frontier_coverage_25": 0.2875380277633667, "rewards/frontier_coverage_5": -0.010987864434719085, "rewards/frontier_entropy_batch_reward": -0.3692371368408203, "signal/accuracy_reward/centered_abs_mean": 0.10270182341337204, "signal/accuracy_reward/group_std_mean": 0.142086860537529, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8557999968528748, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05135091170668602, "signal/advantage_abs_mean": 0.7485065340995789, "signal/advantage_pre_scale_abs_mean": 0.08068742454051972, "signal/advantage_pre_scale_std": 0.14736481308937072, "signal/advantage_std": 0.9828257083892822, "signal/brier_reward/centered_abs_mean": 0.1104082152247429, "signal/brier_reward/group_std_mean": 0.14500466585159302, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1834684669971466, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011040821857750415, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.034445105493068694, "signal/confidence_uniqueness_reward/group_std_mean": 0.05594793781638145, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05692398175597191, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003444510605186224, "signal/format_reward/centered_abs_mean": 0.022271049953997136, "signal/format_reward/group_std_mean": 0.04136303998529911, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18274498283863067, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011135524976998568, "signal/frontier_coverage_0/centered_abs_mean": 0.13331829905509948, "signal/frontier_coverage_0/group_std_mean": 0.17565890848636628, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031815215945243835, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019064516527578235, "signal/frontier_coverage_1/centered_abs_mean": 0.13331829905509948, "signal/frontier_coverage_1/group_std_mean": 0.17565890848636628, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031815215945243835, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019064516527578235, "signal/frontier_coverage_10/centered_abs_mean": 0.06181113198399544, "signal/frontier_coverage_10/group_std_mean": 0.08169252574443817, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01474505104124546, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008838991867378354, "signal/frontier_coverage_15/centered_abs_mean": 0.07603696435689926, "signal/frontier_coverage_15/group_std_mean": 0.09389333873987198, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018103727698326112, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010873286053538322, "signal/frontier_coverage_20/centered_abs_mean": 0.10807138234376908, "signal/frontier_coverage_20/group_std_mean": 0.13423685133457183, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025709601119160652, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015454208012670278, "signal/frontier_coverage_25/centered_abs_mean": 0.14581511318683624, "signal/frontier_coverage_25/group_std_mean": 0.18234173357486724, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034686730802059175, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002085156296379864, "signal/frontier_coverage_5/centered_abs_mean": 0.1332632303237915, "signal/frontier_coverage_5/group_std_mean": 0.17558786869049073, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03180203214287758, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001905664219520986, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33877058029174806, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4033771097660065, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5624748587608337, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03387705832719803, "step": 430 }, { "calibration/aurc": 0.17993360872951467, "calibration/batch_distribution_entropy": 0.956776656487045, "calibration/buffer_distribution_entropy": 0.9847372028422502, "calibration/confidence_entropy": 0.48640155317854983, "calibration/coverage@0%": 0.00976978220315286, "calibration/coverage@1%": 0.00976978220315286, "calibration/coverage@10%": 0.47534625919883966, "calibration/coverage@15%": 0.5970449645029289, "calibration/coverage@20%": 0.6671838796988296, "calibration/coverage@25%": 0.7335380912216357, "calibration/coverage@30%": 0.8138264319625941, "calibration/coverage@5%": 0.18928673568716697, "calibration/ece": 0.18617977830276256, "calibration/mean_confidence": 0.5551562162905087, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030989583333333348, "completions/max_length": 4045.4, "completions/max_terminated_length": 4045.4, "completions/mean_length": 1006.7073974609375, "completions/mean_terminated_length": 1038.9220458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 263.2, "epoch": 1.045599430007125, "grad_norm": 0.002083980944007635, "learning_rate": 3.635817307692308e-06, "loss": -0.0897, "num_tokens": 1122548360.0, "reward": 0.9809871554374695, "reward_std": 0.14432497471570968, "rewards/accuracy_reward": 0.6943576335906982, "rewards/brier_reward": 0.7956489801406861, "rewards/confidence_uniqueness_reward": 0.9197240710258484, "rewards/format_reward": 0.9684895873069763, "rewards/frontier_coverage_0": 0.018659752607345582, "rewards/frontier_coverage_1": 0.018659752607345582, "rewards/frontier_coverage_10": 0.03520567715167999, "rewards/frontier_coverage_15": 0.08243112862110138, "rewards/frontier_coverage_20": 0.1451416015625, "rewards/frontier_coverage_25": 0.22207084000110627, "rewards/frontier_coverage_5": 0.018691231869161128, "rewards/frontier_entropy_batch_reward": -0.29708088040351865, "signal/accuracy_reward/centered_abs_mean": 0.12945420891046525, "signal/accuracy_reward/group_std_mean": 0.17190252244472504, "signal/accuracy_reward/group_zero_std_frac": 0.5055555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.879289448261261, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06472710445523262, "signal/advantage_abs_mean": 0.7449347257614136, "signal/advantage_pre_scale_abs_mean": 0.10397075414657593, "signal/advantage_pre_scale_std": 0.18122569024562835, "signal/advantage_std": 0.9831098794937134, "signal/brier_reward/centered_abs_mean": 0.13766922950744628, "signal/brier_reward/group_std_mean": 0.17730204164981841, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18823137879371643, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013766923174262046, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05718742534518242, "signal/confidence_uniqueness_reward/group_std_mean": 0.09346490800380707, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07706695050001144, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005718742869794369, "signal/format_reward/centered_abs_mean": 0.04926757887005806, "signal/format_reward/group_std_mean": 0.08465958237648011, "signal/format_reward/group_zero_std_frac": 0.6777777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.32885663509368895, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02463378943502903, "signal/frontier_coverage_0/centered_abs_mean": 0.17320359647274017, "signal/frontier_coverage_0/group_std_mean": 0.22111334800720214, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033909741789102554, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024768114555627106, "signal/frontier_coverage_1/centered_abs_mean": 0.17320359647274017, "signal/frontier_coverage_1/group_std_mean": 0.22111334800720214, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033909741789102554, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024768114555627106, "signal/frontier_coverage_10/centered_abs_mean": 0.0769290342926979, "signal/frontier_coverage_10/group_std_mean": 0.09959446638822556, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015100923553109168, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011000852100551128, "signal/frontier_coverage_15/centered_abs_mean": 0.07145349681377411, "signal/frontier_coverage_15/group_std_mean": 0.08837363570928573, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014147062785923481, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010217850678600371, "signal/frontier_coverage_20/centered_abs_mean": 0.09702417999505997, "signal/frontier_coverage_20/group_std_mean": 0.12051473706960678, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019190432131290437, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013874457916244864, "signal/frontier_coverage_25/centered_abs_mean": 0.13127293437719345, "signal/frontier_coverage_25/group_std_mean": 0.16414665877819062, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.025929966941475868, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018772028852254152, "signal/frontier_coverage_5/centered_abs_mean": 0.17312270998954774, "signal/frontier_coverage_5/group_std_mean": 0.22101564705371857, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03389389365911484, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024756547063589096, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33052846789360046, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3995340406894684, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45733819007873533, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305284790694714, "step": 435 }, { "calibration/aurc": 0.14388746882850229, "calibration/batch_distribution_entropy": 0.9284179641062792, "calibration/buffer_distribution_entropy": 0.9853219225343312, "calibration/confidence_entropy": 0.49713957100109135, "calibration/coverage@0%": 0.051848325523578695, "calibration/coverage@1%": 0.051848325523578695, "calibration/coverage@10%": 0.5079058909249714, "calibration/coverage@15%": 0.7396266713421134, "calibration/coverage@20%": 0.816502049642694, "calibration/coverage@25%": 0.8440807137496872, "calibration/coverage@30%": 0.866268656716418, "calibration/coverage@5%": 0.30370166027493467, "calibration/ece": 0.1480970565108381, "calibration/mean_confidence": 0.5872938701596123, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04756944444444446, "completions/max_length": 4001.4, "completions/max_terminated_length": 4001.4, "completions/mean_length": 987.0814208984375, "completions/mean_terminated_length": 1037.7956420898438, "completions/min_length": 0.0, "completions/min_terminated_length": 248.2, "epoch": 1.057599280009, "grad_norm": 0.0020778202451765537, "learning_rate": 3.605769230769231e-06, "loss": -0.1183, "num_tokens": 1137019666.0, "reward": 0.9766067028045654, "reward_std": 0.17658871114254, "rewards/accuracy_reward": 0.7150173664093018, "rewards/brier_reward": 0.7861824035644531, "rewards/confidence_uniqueness_reward": 0.8997833251953125, "rewards/format_reward": 0.9480902791023255, "rewards/frontier_coverage_0": -0.008721314929425716, "rewards/frontier_coverage_1": -0.008721314929425716, "rewards/frontier_coverage_10": 0.024878227338194846, "rewards/frontier_coverage_15": 0.085553839802742, "rewards/frontier_coverage_20": 0.1561792552471161, "rewards/frontier_coverage_25": 0.24047624468803405, "rewards/frontier_coverage_5": -0.008641589153558015, "rewards/frontier_entropy_batch_reward": -0.30422061681747437, "signal/accuracy_reward/centered_abs_mean": 0.13926323801279067, "signal/accuracy_reward/group_std_mean": 0.19407737255096436, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7671608686447143, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06963161900639533, "signal/advantage_abs_mean": 0.7121507525444031, "signal/advantage_pre_scale_abs_mean": 0.1229197159409523, "signal/advantage_pre_scale_std": 0.21047253012657166, "signal/advantage_std": 0.9833353638648987, "signal/brier_reward/centered_abs_mean": 0.14424349814653398, "signal/brier_reward/group_std_mean": 0.19108721613883972, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.15954833924770356, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014424350298941135, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08067524954676628, "signal/confidence_uniqueness_reward/group_std_mean": 0.12732555568218232, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08215240314602852, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008067525085061788, "signal/format_reward/centered_abs_mean": 0.07577040046453476, "signal/format_reward/group_std_mean": 0.12256582081317902, "signal/format_reward/group_zero_std_frac": 0.5611111134290695, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.3762800365686417, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03788520023226738, "signal/frontier_coverage_0/centered_abs_mean": 0.15879909992218016, "signal/frontier_coverage_0/group_std_mean": 0.20415432155132293, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.025722567364573477, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022708271164447067, "signal/frontier_coverage_1/centered_abs_mean": 0.15879909992218016, "signal/frontier_coverage_1/group_std_mean": 0.20415432155132293, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.025722567364573477, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022708271164447067, "signal/frontier_coverage_10/centered_abs_mean": 0.06879703104496002, "signal/frontier_coverage_10/group_std_mean": 0.08966280072927475, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011097630951553583, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009837975027039646, "signal/frontier_coverage_15/centered_abs_mean": 0.07169133126735687, "signal/frontier_coverage_15/group_std_mean": 0.08976521641016007, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011832451168447732, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010251860483549535, "signal/frontier_coverage_20/centered_abs_mean": 0.1028002068400383, "signal/frontier_coverage_20/group_std_mean": 0.1287078246474266, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01704120673239231, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001470042997971177, "signal/frontier_coverage_25/centered_abs_mean": 0.14137257635593414, "signal/frontier_coverage_25/group_std_mean": 0.17798839807510375, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023427498526871204, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002021627686917782, "signal/frontier_coverage_5/centered_abs_mean": 0.15861307382583617, "signal/frontier_coverage_5/group_std_mean": 0.20392609238624573, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.025691739097237588, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022681670263409613, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33480705618858336, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3987271010875702, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3848613739013672, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03348070606589317, "step": 440 }, { "calibration/aurc": 0.10326447524720148, "calibration/batch_distribution_entropy": 0.9536010778222744, "calibration/buffer_distribution_entropy": 0.9841287707055834, "calibration/confidence_entropy": 0.4816929863011735, "calibration/coverage@0%": 0.05213600366138711, "calibration/coverage@1%": 0.08546933699472044, "calibration/coverage@10%": 0.5888607070042604, "calibration/coverage@15%": 0.7645768229948711, "calibration/coverage@20%": 0.8811925409547783, "calibration/coverage@25%": 0.9414970339001931, "calibration/coverage@30%": 0.9771117166212534, "calibration/coverage@5%": 0.3900736698300794, "calibration/ece": 0.14976913086003144, "calibration/mean_confidence": 0.6172107058464944, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01597222222222223, "completions/max_length": 3980.4, "completions/max_terminated_length": 3980.4, "completions/mean_length": 953.7552978515625, "completions/mean_terminated_length": 969.312060546875, "completions/min_length": 0.0, "completions/min_terminated_length": 243.4, "epoch": 1.069599130010875, "grad_norm": 0.0019089620327576995, "learning_rate": 3.575721153846154e-06, "loss": -0.0578, "num_tokens": 1151063951.0, "reward": 1.0075167655944823, "reward_std": 0.125567664206028, "rewards/accuracy_reward": 0.7283854126930237, "rewards/brier_reward": 0.8320141434669495, "rewards/confidence_uniqueness_reward": 0.9312868356704712, "rewards/format_reward": 0.9836805582046508, "rewards/frontier_coverage_0": 0.01591839836910367, "rewards/frontier_coverage_1": 0.01591839836910367, "rewards/frontier_coverage_10": 0.03556139282882213, "rewards/frontier_coverage_15": 0.09857904762029648, "rewards/frontier_coverage_20": 0.17490629553794862, "rewards/frontier_coverage_25": 0.2656311184167862, "rewards/frontier_coverage_5": 0.015986279817298055, "rewards/frontier_entropy_batch_reward": -0.3374809443950653, "signal/accuracy_reward/centered_abs_mean": 0.11180012971162796, "signal/accuracy_reward/group_std_mean": 0.15532722175121308, "signal/accuracy_reward/group_zero_std_frac": 0.5277777791023255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8297061920166016, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05590006485581398, "signal/advantage_abs_mean": 0.7285216093063355, "signal/advantage_pre_scale_abs_mean": 0.08672792613506317, "signal/advantage_pre_scale_std": 0.1563153862953186, "signal/advantage_std": 0.9829874873161316, "signal/brier_reward/centered_abs_mean": 0.11765489429235458, "signal/brier_reward/group_std_mean": 0.1569055736064911, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17583496868610382, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011765489727258683, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0403615452349186, "signal/confidence_uniqueness_reward/group_std_mean": 0.07109490633010865, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.060684775561094285, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0040361546911299225, "signal/format_reward/centered_abs_mean": 0.02916666679084301, "signal/format_reward/group_std_mean": 0.05808819979429245, "signal/format_reward/group_zero_std_frac": 0.7527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21720606386661528, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.014583333395421505, "signal/frontier_coverage_0/centered_abs_mean": 0.14205318093299865, "signal/frontier_coverage_0/group_std_mean": 0.18557943999767304, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030138077586889266, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002031360566616058, "signal/frontier_coverage_1/centered_abs_mean": 0.14205318093299865, "signal/frontier_coverage_1/group_std_mean": 0.18557943999767304, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030138077586889266, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002031360566616058, "signal/frontier_coverage_10/centered_abs_mean": 0.06313612163066865, "signal/frontier_coverage_10/group_std_mean": 0.08182553499937058, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013453066535294055, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009028465370647609, "signal/frontier_coverage_15/centered_abs_mean": 0.07494814544916154, "signal/frontier_coverage_15/group_std_mean": 0.09314066916704178, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01615871414542198, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010717584751546382, "signal/frontier_coverage_20/centered_abs_mean": 0.10805933326482772, "signal/frontier_coverage_20/group_std_mean": 0.13530959486961364, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023342077061533927, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015452483668923378, "signal/frontier_coverage_25/centered_abs_mean": 0.14793600142002106, "signal/frontier_coverage_25/group_std_mean": 0.18606266975402833, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0319239042699337, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002115484792739153, "signal/frontier_coverage_5/centered_abs_mean": 0.14187667965888978, "signal/frontier_coverage_5/group_std_mean": 0.18535732924938203, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030100544169545173, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002028836542740464, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33231388330459594, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40017271041870117, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5018444716930389, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03323138877749443, "step": 445 }, { "calibration/aurc": 0.08478005706856111, "calibration/batch_distribution_entropy": 0.9683973579606967, "calibration/buffer_distribution_entropy": 0.9837530232424537, "calibration/confidence_entropy": 0.48627515320497283, "calibration/coverage@0%": 0.24325156703800324, "calibration/coverage@1%": 0.2568285905366977, "calibration/coverage@10%": 0.5993765100063526, "calibration/coverage@15%": 0.8358934737999405, "calibration/coverage@20%": 0.9019483275751551, "calibration/coverage@25%": 0.9727829060531924, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.40373817347591984, "calibration/ece": 0.21289906688097188, "calibration/mean_confidence": 0.5514703829801496, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015885416666666673, "completions/max_length": 3790.4, "completions/max_terminated_length": 3790.4, "completions/mean_length": 965.8823120117188, "completions/mean_terminated_length": 981.3806762695312, "completions/min_length": 0.0, "completions/min_terminated_length": 222.6, "epoch": 1.08159898001275, "grad_norm": 0.0022360687144100666, "learning_rate": 3.5456730769230774e-06, "loss": -0.0372, "num_tokens": 1165294179.0, "reward": 0.9963475942611695, "reward_std": 0.12414093762636184, "rewards/accuracy_reward": 0.6988715410232544, "rewards/brier_reward": 0.810006046295166, "rewards/confidence_uniqueness_reward": 0.9354828357696533, "rewards/format_reward": 0.98359375, "rewards/frontier_coverage_0": 0.028983466140925885, "rewards/frontier_coverage_1": 0.028983466140925885, "rewards/frontier_coverage_10": 0.03974376879632473, "rewards/frontier_coverage_15": 0.08501660823822021, "rewards/frontier_coverage_20": 0.1462234228849411, "rewards/frontier_coverage_25": 0.22431569099426268, "rewards/frontier_coverage_5": 0.028991557843983173, "rewards/frontier_entropy_batch_reward": -0.27760238349437716, "signal/accuracy_reward/centered_abs_mean": 0.1218912735581398, "signal/accuracy_reward/group_std_mean": 0.16463150084018707, "signal/accuracy_reward/group_zero_std_frac": 0.5083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8862983822822571, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0609456367790699, "signal/advantage_abs_mean": 0.7339087724685669, "signal/advantage_pre_scale_abs_mean": 0.08842306286096573, "signal/advantage_pre_scale_std": 0.15372338891029358, "signal/advantage_std": 0.9830293416976928, "signal/brier_reward/centered_abs_mean": 0.13105346411466598, "signal/brier_reward/group_std_mean": 0.17164418697357178, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19119353890419005, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013105347007513046, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03763534687459469, "signal/confidence_uniqueness_reward/group_std_mean": 0.06680730283260346, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.055056449770927426, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0037635347805917265, "signal/format_reward/centered_abs_mean": 0.02844509556889534, "signal/format_reward/group_std_mean": 0.056251946836709976, "signal/format_reward/group_zero_std_frac": 0.7583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.20870205760002136, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01422254778444767, "signal/frontier_coverage_0/centered_abs_mean": 0.18482233881950377, "signal/frontier_coverage_0/group_std_mean": 0.23658272325992585, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038531605154275894, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002642959402874112, "signal/frontier_coverage_1/centered_abs_mean": 0.18482233881950377, "signal/frontier_coverage_1/group_std_mean": 0.23658272325992585, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038531605154275894, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002642959402874112, "signal/frontier_coverage_10/centered_abs_mean": 0.07772842198610305, "signal/frontier_coverage_10/group_std_mean": 0.1001784086227417, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016198632307350636, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001111516449600458, "signal/frontier_coverage_15/centered_abs_mean": 0.06999329477548599, "signal/frontier_coverage_15/group_std_mean": 0.0872062012553215, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014550425298511981, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010009041405282914, "signal/frontier_coverage_20/centered_abs_mean": 0.09316149204969407, "signal/frontier_coverage_20/group_std_mean": 0.11618510782718658, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.019368303567171098, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001332209282554686, "signal/frontier_coverage_25/centered_abs_mean": 0.1268085092306137, "signal/frontier_coverage_25/group_std_mean": 0.15882777273654938, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.026379085332155227, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001813361677341163, "signal/frontier_coverage_5/centered_abs_mean": 0.18448520302772523, "signal/frontier_coverage_5/group_std_mean": 0.23615312576293945, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03846092000603676, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026381383650004864, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3260856032371521, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3959659218788147, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47359164953231814, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03260856196284294, "step": 450 }, { "epoch": 1.08159898001275, "eval_calibration/aurc": 0.13309575401983562, "eval_calibration/batch_distribution_entropy": 0.936133368392568, "eval_calibration/buffer_distribution_entropy": 0.9838314799658758, "eval_calibration/confidence_entropy": 0.4708227347238358, "eval_calibration/coverage@0%": 0.26797715053763443, "eval_calibration/coverage@1%": 0.26797715053763443, "eval_calibration/coverage@10%": 0.5540994623655914, "eval_calibration/coverage@15%": 0.6597782258064516, "eval_calibration/coverage@20%": 0.7594086021505376, "eval_calibration/coverage@25%": 0.9164986559139785, "eval_calibration/coverage@30%": 0.9375, "eval_calibration/coverage@5%": 0.3323252688172043, "eval_calibration/ece": 0.21734695889336916, "eval_calibration/mean_confidence": 0.5601244476926523, "eval_completions/clipped_ratio": 0.005208333333333333, "eval_completions/max_length": 3039.8333333333335, "eval_completions/max_terminated_length": 3039.8333333333335, "eval_completions/mean_length": 954.2652994791666, "eval_completions/mean_terminated_length": 959.2304890950521, "eval_completions/min_length": 99.5, "eval_completions/min_terminated_length": 270.5, "eval_loss": 0.0, "eval_num_tokens": 1165294179.0, "eval_reward": 0.9161022206147512, "eval_reward_std": 0.2383043939868609, "eval_rewards/accuracy_reward": 0.6814236044883728, "eval_rewards/brier_reward": 0.8101389706134796, "eval_rewards/confidence_uniqueness_reward": 0.8908315300941467, "eval_rewards/format_reward": 0.9921875, "eval_rewards/frontier_coverage_0": 0.03702201593356828, "eval_rewards/frontier_coverage_1": 0.03702201593356828, "eval_rewards/frontier_coverage_10": 0.04230095911771059, "eval_rewards/frontier_coverage_15": 0.08261789381504059, "eval_rewards/frontier_coverage_20": 0.14037525778015456, "eval_rewards/frontier_coverage_25": 0.21233193079630533, "eval_rewards/frontier_coverage_5": 0.037026698933914304, "eval_rewards/frontier_entropy_batch_reward": -0.9921875, "eval_runtime": 204.9493, "eval_samples_per_second": 4.879, "eval_signal/accuracy_reward/centered_abs_mean": 0.4199761301279068, "eval_signal/accuracy_reward/group_std_mean": 0.4641881287097931, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8903038104375204, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2099880650639534, "eval_signal/advantage_abs_mean": 0.869709312915802, "eval_signal/advantage_pre_scale_abs_mean": 0.20785571883122125, "eval_signal/advantage_pre_scale_std": 0.23637428879737854, "eval_signal/advantage_std": 0.9864057501157125, "eval_signal/brier_reward/centered_abs_mean": 0.18304560085137686, "eval_signal/brier_reward/group_std_mean": 0.23924180368582407, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07741126045584679, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01830456079915166, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.047562570621569954, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07460235804319382, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020098049348841112, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004756257132006188, "eval_signal/format_reward/centered_abs_mean": 0.015136718439559141, "eval_signal/format_reward/group_std_mean": 0.044194173688689865, "eval_signal/format_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.03172660774240891, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007568359219779571, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2983556042114894, "eval_signal/frontier_coverage_0/group_std_mean": 0.4108336369196574, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.018101781296233337, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004266485145005087, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2983556042114894, "eval_signal/frontier_coverage_1/group_std_mean": 0.4108336369196574, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.018101781296233337, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004266485145005087, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.10858400911092758, "eval_signal/frontier_coverage_10/group_std_mean": 0.1563408076763153, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006582974921911955, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0015527513654281695, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.09981824830174446, "eval_signal/frontier_coverage_15/group_std_mean": 0.12792696679631868, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0060469558617721, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014274009154178202, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1740375558535258, "eval_signal/frontier_coverage_20/group_std_mean": 0.21623691419760385, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010545457247644663, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0024887369945645332, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2637837479511897, "eval_signal/frontier_coverage_25/group_std_mean": 0.3216549704472224, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015982618710647028, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003772107457431654, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2975670297940572, "eval_signal/frontier_coverage_5/group_std_mean": 0.4098781496286392, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01805388368666172, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0042552082644154625, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015136718439559141, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.044194173688689865, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.750000019868215, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006345321889966726, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015136719642517467, "eval_steps_per_second": 0.029, "step": 450 }, { "epoch": 1.08159898001275, "step": 450, "train_probe_calibration/aurc": 0.09654250782712287, "train_probe_calibration/batch_distribution_entropy": 0.9183525716825388, "train_probe_calibration/buffer_distribution_entropy": 0.9837598538858333, "train_probe_calibration/confidence_entropy": 0.4956307539442461, "train_probe_calibration/coverage@0%": 0.3541666666666667, "train_probe_calibration/coverage@1%": 0.3541666666666667, "train_probe_calibration/coverage@10%": 0.671875, "train_probe_calibration/coverage@15%": 0.7864583333333334, "train_probe_calibration/coverage@20%": 0.8541666666666666, "train_probe_calibration/coverage@25%": 0.9114583333333334, "train_probe_calibration/coverage@30%": 0.9583333333333334, "train_probe_calibration/coverage@5%": 0.3958333333333333, "train_probe_calibration/ece": 0.23860572916666667, "train_probe_calibration/mean_confidence": 0.5797567708333333, "train_probe_completions/clipped_ratio": 0.004340277777777772, "train_probe_completions/max_length": 2840.3333333333335, "train_probe_completions/max_terminated_length": 2840.3333333333335, "train_probe_completions/mean_length": 917.0496826171875, "train_probe_completions/mean_terminated_length": 921.0797424316406, "train_probe_completions/min_length": 113.16666666666667, "train_probe_completions/min_terminated_length": 202.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 1165294179.0, "train_probe_reward": 0.9429791967074076, "train_probe_reward_std": 0.22729473561048508, "train_probe_rewards/accuracy_reward": 0.7274305621782938, "train_probe_rewards/brier_reward": 0.8345652719338735, "train_probe_rewards/confidence_uniqueness_reward": 0.8907654881477356, "train_probe_rewards/format_reward": 0.9930555522441864, "train_probe_rewards/frontier_coverage_0": 0.02799505041912198, "train_probe_rewards/frontier_coverage_1": 0.02799505041912198, "train_probe_rewards/frontier_coverage_10": 0.04346960255255302, "train_probe_rewards/frontier_coverage_15": 0.10106463233629863, "train_probe_rewards/frontier_coverage_20": 0.17408683399359384, "train_probe_rewards/frontier_coverage_25": 0.2622348219156265, "train_probe_rewards/frontier_coverage_5": 0.02808955203120907, "train_probe_rewards/frontier_entropy_batch_reward": -0.9930555522441864, "train_probe_runtime": 190.26, "train_probe_samples_per_second": 5.256, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3901909738779068, "train_probe_signal/accuracy_reward/group_std_mean": 0.44776545961697894, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8720936874548594, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1950954869389534, "train_probe_signal/advantage_abs_mean": 0.8436907827854156, "train_probe_signal/advantage_pre_scale_abs_mean": 0.1921898971001307, "train_probe_signal/advantage_pre_scale_std": 0.22606053948402405, "train_probe_signal/advantage_std": 0.9863827129205068, "train_probe_signal/brier_reward/centered_abs_mean": 0.1621926079193751, "train_probe_signal/brier_reward/group_std_mean": 0.21996241311232248, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07256464473903179, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016219260947157938, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04783617208401362, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07148952161272366, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021304875301818054, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004783617487798135, "train_probe_signal/format_reward/centered_abs_mean": 0.013346354011446238, "train_probe_signal/format_reward/group_std_mean": 0.0362943010404706, "train_probe_signal/format_reward/group_zero_std_frac": 0.8055555721124014, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.029005679301917553, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.006673177005723119, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2803582151730855, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.3964219441016515, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017947336037953694, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004009122572218378, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2803582151730855, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.3964219441016515, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017947336037953694, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004009122572218378, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.10028981169064839, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.1465649058421453, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006421741874267657, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001434144234129538, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1032271757721901, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.12874728937943777, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0066018542274832726, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014761485702668626, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.17853716760873795, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.21422516802946726, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011413250584155321, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025530814891681075, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.26261725028355914, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.3138073782126109, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01678881049156189, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0037554265776028237, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2796053687731425, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3954645246267319, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01789912985016902, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00399835667728136, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.013346354011446238, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0362943010404706, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555721124014, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.005801136062170069, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0013346354438302417, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.1786189017935566, "calibration/batch_distribution_entropy": 0.9565413659875921, "calibration/buffer_distribution_entropy": 0.9837801817834313, "calibration/confidence_entropy": 0.47072229513495667, "calibration/coverage@0%": 0.03604247404982252, "calibration/coverage@1%": 0.09677545834301624, "calibration/coverage@10%": 0.4448384934657499, "calibration/coverage@15%": 0.501737361819292, "calibration/coverage@20%": 0.590501130051627, "calibration/coverage@25%": 0.6458115752828546, "calibration/coverage@30%": 0.8118051022628372, "calibration/coverage@5%": 0.22979792780200406, "calibration/ece": 0.13804144620935982, "calibration/mean_confidence": 0.5963494289340041, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444464, "completions/max_length": 3849.0, "completions/max_terminated_length": 3849.0, "completions/mean_length": 941.025, "completions/mean_terminated_length": 944.0136962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 184.0, "epoch": 1.0935988300146249, "grad_norm": 0.0026187740731984377, "learning_rate": 3.5156250000000003e-06, "loss": -0.0019, "num_tokens": 1179246659.0, "reward": 1.0269762516021728, "reward_std": 0.11533690541982651, "rewards/accuracy_reward": 0.7481770753860474, "rewards/brier_reward": 0.8202757239341736, "rewards/confidence_uniqueness_reward": 0.9461743950843811, "rewards/format_reward": 0.9964409708976746, "rewards/frontier_coverage_0": -0.004797273135045544, "rewards/frontier_coverage_1": -0.004797273135045544, "rewards/frontier_coverage_10": 0.034060157090425494, "rewards/frontier_coverage_15": 0.09921480715274811, "rewards/frontier_coverage_20": 0.17450326085090637, "rewards/frontier_coverage_25": 0.2625850081443787, "rewards/frontier_coverage_5": -0.0045932690671179445, "rewards/frontier_entropy_batch_reward": -0.29931144416332245, "signal/accuracy_reward/centered_abs_mean": 0.140565325319767, "signal/accuracy_reward/group_std_mean": 0.18419778048992158, "signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0589641332626343, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0702826626598835, "signal/advantage_abs_mean": 0.7629113554954529, "signal/advantage_pre_scale_abs_mean": 0.08662729263305664, "signal/advantage_pre_scale_std": 0.13903791308403016, "signal/advantage_std": 0.9829711079597473, "signal/brier_reward/centered_abs_mean": 0.1235265538096428, "signal/brier_reward/group_std_mean": 0.1592309892177582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18637319803237914, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0123526556417346, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02009183168411255, "signal/confidence_uniqueness_reward/group_std_mean": 0.033618181198835376, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030547019839286805, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020091832615435124, "signal/format_reward/centered_abs_mean": 0.006787109328433872, "signal/format_reward/group_std_mean": 0.01738979984074831, "signal/format_reward/group_zero_std_frac": 0.9111111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05055982656776905, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003393554664216936, "signal/frontier_coverage_0/centered_abs_mean": 0.17253205180168152, "signal/frontier_coverage_0/group_std_mean": 0.22136968672275542, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03716257512569428, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002467208169400692, "signal/frontier_coverage_1/centered_abs_mean": 0.17253205180168152, "signal/frontier_coverage_1/group_std_mean": 0.22136968672275542, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03716257512569428, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002467208169400692, "signal/frontier_coverage_10/centered_abs_mean": 0.07255163341760636, "signal/frontier_coverage_10/group_std_mean": 0.09273735284805298, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01566624455153942, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010374883189797402, "signal/frontier_coverage_15/centered_abs_mean": 0.07906774580478668, "signal/frontier_coverage_15/group_std_mean": 0.09843083024024964, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017194531671702862, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011306687723845244, "signal/frontier_coverage_20/centered_abs_mean": 0.11041324287652969, "signal/frontier_coverage_20/group_std_mean": 0.13848029375076293, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024011900275945665, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00157890934497118, "signal/frontier_coverage_25/centered_abs_mean": 0.15008221864700316, "signal/frontier_coverage_25/group_std_mean": 0.18917769193649292, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03261282928287983, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021461757365614175, "signal/frontier_coverage_5/centered_abs_mean": 0.17207085490226745, "signal/frontier_coverage_5/group_std_mean": 0.22079600393772125, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03706258684396744, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002460613241419196, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3263821184635162, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3926126003265381, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49657478332519533, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326382115483284, "step": 455 }, { "calibration/aurc": 0.13182774360565988, "calibration/batch_distribution_entropy": 0.9525344528071358, "calibration/buffer_distribution_entropy": 0.9834768704905992, "calibration/confidence_entropy": 0.5074257563517042, "calibration/coverage@0%": 0.03441579634464752, "calibration/coverage@1%": 0.03441579634464752, "calibration/coverage@10%": 0.4196420800696258, "calibration/coverage@15%": 0.6088677654482157, "calibration/coverage@20%": 0.8418706483899042, "calibration/coverage@25%": 0.922715404699739, "calibration/coverage@30%": 0.9577023498694517, "calibration/coverage@5%": 0.2622307441253264, "calibration/ece": 0.14650989358297242, "calibration/mean_confidence": 0.5997194100397429, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001388888888888906, "completions/max_length": 3680.8, "completions/max_terminated_length": 3680.8, "completions/mean_length": 817.7682495117188, "completions/mean_terminated_length": 818.874755859375, "completions/min_length": 0.0, "completions/min_terminated_length": 154.4, "epoch": 1.1055986800164999, "grad_norm": 0.0029210939537733793, "learning_rate": 3.4855769230769233e-06, "loss": 0.0012, "num_tokens": 1191737141.0, "reward": 1.0027261614799499, "reward_std": 0.10649179220199585, "rewards/accuracy_reward": 0.69921875, "rewards/brier_reward": 0.8265803694725037, "rewards/confidence_uniqueness_reward": 0.946752381324768, "rewards/format_reward": 0.998524296283722, "rewards/frontier_coverage_0": 0.02881563175469637, "rewards/frontier_coverage_1": 0.02881563175469637, "rewards/frontier_coverage_10": 0.04026442915201187, "rewards/frontier_coverage_15": 0.09344350546598434, "rewards/frontier_coverage_20": 0.16026363670825958, "rewards/frontier_coverage_25": 0.23922086954116822, "rewards/frontier_coverage_5": 0.028822965174913406, "rewards/frontier_entropy_batch_reward": -0.32339624464511874, "signal/accuracy_reward/centered_abs_mean": 0.13109266459941865, "signal/accuracy_reward/group_std_mean": 0.17193427979946135, "signal/accuracy_reward/group_zero_std_frac": 0.5111111164093017, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0414057493209838, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06554633229970933, "signal/advantage_abs_mean": 0.7716199159622192, "signal/advantage_pre_scale_abs_mean": 0.08222170770168305, "signal/advantage_pre_scale_std": 0.12968189120292664, "signal/advantage_std": 0.9828980565071106, "signal/brier_reward/centered_abs_mean": 0.11129124760627747, "signal/brier_reward/group_std_mean": 0.14414749443531036, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17665610015392302, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011129124835133553, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017181032337248325, "signal/confidence_uniqueness_reward/group_std_mean": 0.024586594104766844, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027339120209217072, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017181032570078969, "signal/format_reward/centered_abs_mean": 0.002739800279960036, "signal/format_reward/group_std_mean": 0.006618343479931355, "signal/format_reward/group_zero_std_frac": 0.9666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.020953606348484755, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001369900139980018, "signal/frontier_coverage_0/centered_abs_mean": 0.15755559802055358, "signal/frontier_coverage_0/group_std_mean": 0.20323067009449006, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035665206611156464, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002253045095130801, "signal/frontier_coverage_1/centered_abs_mean": 0.15755559802055358, "signal/frontier_coverage_1/group_std_mean": 0.20323067009449006, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035665206611156464, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002253045095130801, "signal/frontier_coverage_10/centered_abs_mean": 0.06791285276412964, "signal/frontier_coverage_10/group_std_mean": 0.08749785423278808, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015421891212463379, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009711537742987276, "signal/frontier_coverage_15/centered_abs_mean": 0.07548021227121353, "signal/frontier_coverage_15/group_std_mean": 0.09430369436740875, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017251455783843996, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001079367089550942, "signal/frontier_coverage_20/centered_abs_mean": 0.10724329799413682, "signal/frontier_coverage_20/group_std_mean": 0.13464588522911072, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024510875716805457, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015335792209953069, "signal/frontier_coverage_25/centered_abs_mean": 0.14685104191303253, "signal/frontier_coverage_25/group_std_mean": 0.18480223715305327, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03354732654988766, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020999698666855694, "signal/frontier_coverage_5/centered_abs_mean": 0.1571534216403961, "signal/frontier_coverage_5/group_std_mean": 0.2027212381362915, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035574134439229965, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022472939221188427, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3332145571708679, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40059667229652407, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5312770664691925, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03332145735621452, "step": 460 }, { "calibration/aurc": 0.11050655459715138, "calibration/batch_distribution_entropy": 0.9616996588856601, "calibration/buffer_distribution_entropy": 0.9835301818257198, "calibration/confidence_entropy": 0.4803941482968095, "calibration/coverage@0%": 0.06412097476066145, "calibration/coverage@1%": 0.06412097476066145, "calibration/coverage@10%": 0.5430415034812881, "calibration/coverage@15%": 0.781678361618799, "calibration/coverage@20%": 0.8921303851174935, "calibration/coverage@25%": 0.952581048738033, "calibration/coverage@30%": 0.98125, "calibration/coverage@5%": 0.22615181679721497, "calibration/ece": 0.19289299100357954, "calibration/mean_confidence": 0.5811207258908004, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0015625, "completions/max_length": 3707.6, "completions/max_terminated_length": 3707.6, "completions/mean_length": 763.6137451171875, "completions/mean_terminated_length": 764.8095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 146.2, "epoch": 1.1175985300183748, "grad_norm": 0.0028260101098567247, "learning_rate": 3.4555288461538466e-06, "loss": 0.0146, "num_tokens": 1203618675.0, "reward": 1.0120792388916016, "reward_std": 0.10953928977251053, "rewards/accuracy_reward": 0.7052951455116272, "rewards/brier_reward": 0.8180999517440796, "rewards/confidence_uniqueness_reward": 0.9513657927513123, "rewards/format_reward": 0.9983507037162781, "rewards/frontier_coverage_0": 0.026010525721358136, "rewards/frontier_coverage_1": 0.026010525721358136, "rewards/frontier_coverage_10": 0.039709169417619705, "rewards/frontier_coverage_15": 0.08716795891523361, "rewards/frontier_coverage_20": 0.14967485666275024, "rewards/frontier_coverage_25": 0.2271842062473297, "rewards/frontier_coverage_5": 0.02602526988484897, "rewards/frontier_entropy_batch_reward": -0.2500975012779236, "signal/accuracy_reward/centered_abs_mean": 0.14068467617034913, "signal/accuracy_reward/group_std_mean": 0.18849452435970307, "signal/accuracy_reward/group_zero_std_frac": 0.4611111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0389271974563599, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07034233808517457, "signal/advantage_abs_mean": 0.75585458278656, "signal/advantage_pre_scale_abs_mean": 0.08214503675699233, "signal/advantage_pre_scale_std": 0.12915118932723998, "signal/advantage_std": 0.9830074667930603, "signal/brier_reward/centered_abs_mean": 0.12185637354850769, "signal/brier_reward/group_std_mean": 0.15803824067115785, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1799175798892975, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012185638025403022, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015415123663842678, "signal/confidence_uniqueness_reward/group_std_mean": 0.023467693105340005, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022776027396321296, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015415124129503966, "signal/format_reward/centered_abs_mean": 0.003152126632630825, "signal/format_reward/group_std_mean": 0.008380424790084362, "signal/format_reward/group_zero_std_frac": 0.955555546283722, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.023016710579395295, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0015760633163154126, "signal/frontier_coverage_0/centered_abs_mean": 0.18920553624629974, "signal/frontier_coverage_0/group_std_mean": 0.24695340692996978, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0399616576731205, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0027056390419602392, "signal/frontier_coverage_1/centered_abs_mean": 0.18920553624629974, "signal/frontier_coverage_1/group_std_mean": 0.24695340692996978, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0399616576731205, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0027056390419602392, "signal/frontier_coverage_10/centered_abs_mean": 0.07501264661550522, "signal/frontier_coverage_10/group_std_mean": 0.09830510169267655, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01587317083030939, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010726808570325375, "signal/frontier_coverage_15/centered_abs_mean": 0.07189750969409943, "signal/frontier_coverage_15/group_std_mean": 0.09009484201669693, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015242060646414757, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001028134406078607, "signal/frontier_coverage_20/centered_abs_mean": 0.09858875572681428, "signal/frontier_coverage_20/group_std_mean": 0.12397728711366654, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020897452905774116, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014098191168159246, "signal/frontier_coverage_25/centered_abs_mean": 0.13496056348085403, "signal/frontier_coverage_25/group_std_mean": 0.17078811824321746, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02859700210392475, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019299360923469067, "signal/frontier_coverage_5/centered_abs_mean": 0.1886357218027115, "signal/frontier_coverage_5/group_std_mean": 0.24621900022029877, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03984150066971779, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0026974908541888, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31103195548057555, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38082465529441833, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4604890525341034, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031103195250034334, "step": 465 }, { "calibration/aurc": 0.08517079158054916, "calibration/batch_distribution_entropy": 0.9373243595334273, "calibration/buffer_distribution_entropy": 0.9824645940877273, "calibration/confidence_entropy": 0.46250007206138244, "calibration/coverage@0%": 0.07691184312584835, "calibration/coverage@1%": 0.154516009792515, "calibration/coverage@10%": 0.6831409803595667, "calibration/coverage@15%": 0.7912037037037039, "calibration/coverage@20%": 0.8361937830687831, "calibration/coverage@25%": 0.9244378306878307, "calibration/coverage@30%": 0.9708333333333332, "calibration/coverage@5%": 0.577076970622455, "calibration/ece": 0.18084958862304013, "calibration/mean_confidence": 0.5971060114193779, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888862, "completions/max_length": 3980.8, "completions/max_terminated_length": 3980.8, "completions/mean_length": 772.4866333007812, "completions/mean_terminated_length": 774.7864501953125, "completions/min_length": 0.0, "completions/min_terminated_length": 156.6, "epoch": 1.1295983800202498, "grad_norm": 0.0026523873675614595, "learning_rate": 3.4254807692307695e-06, "loss": -0.0018, "num_tokens": 1215594809.0, "reward": 1.0093950271606444, "reward_std": 0.10678299516439438, "rewards/accuracy_reward": 0.715624988079071, "rewards/brier_reward": 0.820962381362915, "rewards/confidence_uniqueness_reward": 0.9453279972076416, "rewards/format_reward": 0.9969618082046509, "rewards/frontier_coverage_0": 0.017951905727386475, "rewards/frontier_coverage_1": 0.017951905727386475, "rewards/frontier_coverage_10": 0.03737656474113464, "rewards/frontier_coverage_15": 0.09893600046634674, "rewards/frontier_coverage_20": 0.17084673941135406, "rewards/frontier_coverage_25": 0.25529789328575136, "rewards/frontier_coverage_5": 0.018055624887347223, "rewards/frontier_entropy_batch_reward": -0.32342151999473573, "signal/accuracy_reward/centered_abs_mean": 0.12125650942325591, "signal/accuracy_reward/group_std_mean": 0.16661165952682494, "signal/accuracy_reward/group_zero_std_frac": 0.5027777969837188, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9284161925315857, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06062825471162796, "signal/advantage_abs_mean": 0.755516505241394, "signal/advantage_pre_scale_abs_mean": 0.07899446189403533, "signal/advantage_pre_scale_std": 0.12964089810848237, "signal/advantage_std": 0.9829226613044739, "signal/brier_reward/centered_abs_mean": 0.12145691961050034, "signal/brier_reward/group_std_mean": 0.15792261064052582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.189512637257576, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012145692296326161, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019829026609659194, "signal/confidence_uniqueness_reward/group_std_mean": 0.029715277999639512, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03138362094759941, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019829027354717256, "signal/format_reward/centered_abs_mean": 0.005626085074618458, "signal/format_reward/group_std_mean": 0.012826384603977203, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.045294156298041344, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002813042537309229, "signal/frontier_coverage_0/centered_abs_mean": 0.161966073513031, "signal/frontier_coverage_0/group_std_mean": 0.21199508607387543, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036105792969465256, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00231611467897892, "signal/frontier_coverage_1/centered_abs_mean": 0.161966073513031, "signal/frontier_coverage_1/group_std_mean": 0.21199508607387543, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036105792969465256, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00231611467897892, "signal/frontier_coverage_10/centered_abs_mean": 0.06864747554063796, "signal/frontier_coverage_10/group_std_mean": 0.08801006525754929, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015342991799116135, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009816589066758753, "signal/frontier_coverage_15/centered_abs_mean": 0.08157427757978439, "signal/frontier_coverage_15/group_std_mean": 0.10169614106416702, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018237525969743727, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001166512188501656, "signal/frontier_coverage_20/centered_abs_mean": 0.1129850059747696, "signal/frontier_coverage_20/group_std_mean": 0.142295703291893, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025199725478887557, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016156855970621109, "signal/frontier_coverage_25/centered_abs_mean": 0.1523255378007889, "signal/frontier_coverage_25/group_std_mean": 0.193336421251297, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0339178204536438, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021782551892101766, "signal/frontier_coverage_5/centered_abs_mean": 0.16131708920001983, "signal/frontier_coverage_5/group_std_mean": 0.2111732006072998, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0359614685177803, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002306834328919649, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3393139183521271, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40554860830307005, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5308336019515991, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033931391686201094, "step": 470 }, { "calibration/aurc": 0.12207177828518079, "calibration/batch_distribution_entropy": 0.9656969774722203, "calibration/buffer_distribution_entropy": 0.9819994989830988, "calibration/confidence_entropy": 0.4727467589110737, "calibration/coverage@0%": 0.12662435359506646, "calibration/coverage@1%": 0.16504585897141055, "calibration/coverage@10%": 0.5823363220828475, "calibration/coverage@15%": 0.6966582552676553, "calibration/coverage@20%": 0.8079826809515552, "calibration/coverage@25%": 0.8561049324882888, "calibration/coverage@30%": 0.8983227702764767, "calibration/coverage@5%": 0.33990680647612914, "calibration/ece": 0.16863616885602545, "calibration/mean_confidence": 0.5226219328072567, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0071180555555555355, "completions/max_length": 4025.6, "completions/max_terminated_length": 4025.6, "completions/mean_length": 857.51484375, "completions/mean_terminated_length": 863.591796875, "completions/min_length": 0.0, "completions/min_terminated_length": 159.8, "epoch": 1.1415982300221248, "grad_norm": 0.0022798883728682995, "learning_rate": 3.3954326923076925e-06, "loss": -0.0073, "num_tokens": 1228563108.0, "reward": 1.0127497553825378, "reward_std": 0.11343308985233307, "rewards/accuracy_reward": 0.7190104126930237, "rewards/brier_reward": 0.8156786322593689, "rewards/confidence_uniqueness_reward": 0.9434163570404053, "rewards/format_reward": 0.9927083134651185, "rewards/frontier_coverage_0": 0.020774408336728813, "rewards/frontier_coverage_1": 0.020774408336728813, "rewards/frontier_coverage_10": 0.04323679804801941, "rewards/frontier_coverage_15": 0.09686812907457351, "rewards/frontier_coverage_20": 0.16587933003902436, "rewards/frontier_coverage_25": 0.24957230389118196, "rewards/frontier_coverage_5": 0.020920474920421837, "rewards/frontier_entropy_batch_reward": -0.2785690575838089, "signal/accuracy_reward/centered_abs_mean": 0.13173285275697708, "signal/accuracy_reward/group_std_mean": 0.17883577346801757, "signal/accuracy_reward/group_zero_std_frac": 0.4694444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9889632225036621, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06586642637848854, "signal/advantage_abs_mean": 0.7479575753211976, "signal/advantage_pre_scale_abs_mean": 0.08370956778526306, "signal/advantage_pre_scale_std": 0.14035601019859315, "signal/advantage_std": 0.9829661011695862, "signal/brier_reward/centered_abs_mean": 0.12453215271234512, "signal/brier_reward/group_std_mean": 0.16238663494586944, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1885141134262085, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012453215941786767, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025474615022540094, "signal/confidence_uniqueness_reward/group_std_mean": 0.03968836776912212, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03929465599358082, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002547461586073041, "signal/format_reward/centered_abs_mean": 0.012565104104578496, "signal/format_reward/group_std_mean": 0.024067432433366776, "signal/format_reward/group_zero_std_frac": 0.9000000119209289, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0986025169491768, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006282552052289248, "signal/frontier_coverage_0/centered_abs_mean": 0.1802436351776123, "signal/frontier_coverage_0/group_std_mean": 0.2356257289648056, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03904039040207863, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025774840265512466, "signal/frontier_coverage_1/centered_abs_mean": 0.1802436351776123, "signal/frontier_coverage_1/group_std_mean": 0.2356257289648056, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03904039040207863, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025774840265512466, "signal/frontier_coverage_10/centered_abs_mean": 0.0703365221619606, "signal/frontier_coverage_10/group_std_mean": 0.09053354859352111, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015251378715038299, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010058123152703047, "signal/frontier_coverage_15/centered_abs_mean": 0.07596449106931687, "signal/frontier_coverage_15/group_std_mean": 0.0946640431880951, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01646037306636572, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010862921830266715, "signal/frontier_coverage_20/centered_abs_mean": 0.10371304005384445, "signal/frontier_coverage_20/group_std_mean": 0.13013996928930283, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02243281565606594, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014830964850261808, "signal/frontier_coverage_25/centered_abs_mean": 0.1406008318066597, "signal/frontier_coverage_25/group_std_mean": 0.17746139764785768, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030378331989049913, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020105918403714894, "signal/frontier_coverage_5/centered_abs_mean": 0.1796477258205414, "signal/frontier_coverage_5/group_std_mean": 0.23484897315502168, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03891072869300842, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00256896261125803, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32625203132629393, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39263423085212706, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49508474469184877, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03262520357966423, "step": 475 }, { "calibration/aurc": 0.11996723751576499, "calibration/batch_distribution_entropy": 0.9502936109742859, "calibration/buffer_distribution_entropy": 0.9814678499485566, "calibration/confidence_entropy": 0.5001467013815555, "calibration/coverage@0%": 0.11838897543173983, "calibration/coverage@1%": 0.12100677647886027, "calibration/coverage@10%": 0.47935503126530066, "calibration/coverage@15%": 0.6854775724880053, "calibration/coverage@20%": 0.7944413968719853, "calibration/coverage@25%": 0.8744504925528744, "calibration/coverage@30%": 0.9459050474414429, "calibration/coverage@5%": 0.3548078656381403, "calibration/ece": 0.1331829238516709, "calibration/mean_confidence": 0.6054101633020542, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005468750000000022, "completions/max_length": 3896.8, "completions/max_terminated_length": 3896.8, "completions/mean_length": 770.0582641601562, "completions/mean_terminated_length": 774.343505859375, "completions/min_length": 0.0, "completions/min_terminated_length": 154.0, "epoch": 1.1535980800239998, "grad_norm": 0.002632453106343746, "learning_rate": 3.365384615384616e-06, "loss": -0.0152, "num_tokens": 1240526755.0, "reward": 1.0191356778144836, "reward_std": 0.10840296298265457, "rewards/accuracy_reward": 0.7357638955116272, "rewards/brier_reward": 0.8275517463684082, "rewards/confidence_uniqueness_reward": 0.9431678771972656, "rewards/format_reward": 0.994531261920929, "rewards/frontier_coverage_0": 0.009372111305128782, "rewards/frontier_coverage_1": 0.009372111305128782, "rewards/frontier_coverage_10": 0.03894369155168533, "rewards/frontier_coverage_15": 0.10267277508974075, "rewards/frontier_coverage_20": 0.1778305560350418, "rewards/frontier_coverage_25": 0.2657860189676285, "rewards/frontier_coverage_5": 0.009487632277887315, "rewards/frontier_entropy_batch_reward": -0.31856379210948943, "signal/accuracy_reward/centered_abs_mean": 0.11809895783662797, "signal/accuracy_reward/group_std_mean": 0.1618511974811554, "signal/accuracy_reward/group_zero_std_frac": 0.5194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9492631077766418, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05904947891831398, "signal/advantage_abs_mean": 0.7568408727645874, "signal/advantage_pre_scale_abs_mean": 0.07987073361873627, "signal/advantage_pre_scale_std": 0.1343359723687172, "signal/advantage_std": 0.9828636050224304, "signal/brier_reward/centered_abs_mean": 0.11183222085237503, "signal/brier_reward/group_std_mean": 0.1446450471878052, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18134056627750397, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011183222196996212, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023035935312509536, "signal/confidence_uniqueness_reward/group_std_mean": 0.035484759509563445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03718518950045109, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023035936523228885, "signal/format_reward/centered_abs_mean": 0.009879557276144623, "signal/format_reward/group_std_mean": 0.01944063398987055, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07821155041456222, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004939778638072312, "signal/frontier_coverage_0/centered_abs_mean": 0.14692335128784179, "signal/frontier_coverage_0/group_std_mean": 0.1917490392923355, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033961694315075876, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002101003797724843, "signal/frontier_coverage_1/centered_abs_mean": 0.14692335128784179, "signal/frontier_coverage_1/group_std_mean": 0.1917490392923355, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033961694315075876, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002101003797724843, "signal/frontier_coverage_10/centered_abs_mean": 0.05811228826642036, "signal/frontier_coverage_10/group_std_mean": 0.07442953586578369, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013504561595618724, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000831005722284317, "signal/frontier_coverage_15/centered_abs_mean": 0.07867266237735748, "signal/frontier_coverage_15/group_std_mean": 0.09794508963823319, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0183884521946311, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011250190436840057, "signal/frontier_coverage_20/centered_abs_mean": 0.11196665018796921, "signal/frontier_coverage_20/group_std_mean": 0.14016874134540558, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02614797055721283, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016011230880394578, "signal/frontier_coverage_25/centered_abs_mean": 0.15231850743293762, "signal/frontier_coverage_25/group_std_mean": 0.19153027832508088, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03552020974457264, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021781546529382467, "signal/frontier_coverage_5/centered_abs_mean": 0.14627367556095122, "signal/frontier_coverage_5/group_std_mean": 0.19091903269290925, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033811989799141885, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002091713552363217, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3317020297050476, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39510130882263184, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5416593670845031, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03317020274698734, "step": 480 }, { "calibration/aurc": 0.18564510118578065, "calibration/batch_distribution_entropy": 0.9486334244046699, "calibration/buffer_distribution_entropy": 0.9805598930051833, "calibration/confidence_entropy": 0.49161663712729864, "calibration/coverage@0%": 0.006831564361775608, "calibration/coverage@1%": 0.006831564361775608, "calibration/coverage@10%": 0.2141053832594134, "calibration/coverage@15%": 0.3798645940429059, "calibration/coverage@20%": 0.6337962401133928, "calibration/coverage@25%": 0.7658161318860899, "calibration/coverage@30%": 0.9063885631317362, "calibration/coverage@5%": 0.1823190643617756, "calibration/ece": 0.1538997399439367, "calibration/mean_confidence": 0.6057656998083434, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008246527777777768, "completions/max_length": 3902.6, "completions/max_terminated_length": 3902.6, "completions/mean_length": 812.4036499023438, "completions/mean_terminated_length": 819.348095703125, "completions/min_length": 0.0, "completions/min_terminated_length": 139.4, "epoch": 1.1655979300258748, "grad_norm": 0.0023592854849994183, "learning_rate": 3.3353365384615388e-06, "loss": -0.0169, "num_tokens": 1253001261.0, "reward": 1.0053442597389222, "reward_std": 0.12032625824213028, "rewards/accuracy_reward": 0.7107639074325561, "rewards/brier_reward": 0.810713255405426, "rewards/confidence_uniqueness_reward": 0.9426342844963074, "rewards/format_reward": 0.9916666626930237, "rewards/frontier_coverage_0": 0.010230178479105234, "rewards/frontier_coverage_1": 0.010230178479105234, "rewards/frontier_coverage_10": 0.0376857940107584, "rewards/frontier_coverage_15": 0.09146715253591538, "rewards/frontier_coverage_20": 0.15871667861938477, "rewards/frontier_coverage_25": 0.23904962837696075, "rewards/frontier_coverage_5": 0.010379027342423797, "rewards/frontier_entropy_batch_reward": -0.29181754887104033, "signal/accuracy_reward/centered_abs_mean": 0.13616536259651185, "signal/accuracy_reward/group_std_mean": 0.18218682706356049, "signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9551702618598938, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06808268129825593, "signal/advantage_abs_mean": 0.7498261570930481, "signal/advantage_pre_scale_abs_mean": 0.08922160565853118, "signal/advantage_pre_scale_std": 0.14570856988430023, "signal/advantage_std": 0.983072292804718, "signal/brier_reward/centered_abs_mean": 0.12257575690746307, "signal/brier_reward/group_std_mean": 0.1598212093114853, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1727291464805603, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012257575429975987, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02620217837393284, "signal/confidence_uniqueness_reward/group_std_mean": 0.04137233719229698, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03676874563097954, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026202178793027996, "signal/format_reward/centered_abs_mean": 0.014735243190079928, "signal/format_reward/group_std_mean": 0.027417659759521484, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10229784548282624, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007367621595039964, "signal/frontier_coverage_0/centered_abs_mean": 0.16911786496639253, "signal/frontier_coverage_0/group_std_mean": 0.2203467756509781, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399922624230385, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024183853529393673, "signal/frontier_coverage_1/centered_abs_mean": 0.16911786496639253, "signal/frontier_coverage_1/group_std_mean": 0.2203467756509781, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03399922624230385, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024183853529393673, "signal/frontier_coverage_10/centered_abs_mean": 0.06196560263633728, "signal/frontier_coverage_10/group_std_mean": 0.08008685559034348, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012463575229048729, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000886108132544905, "signal/frontier_coverage_15/centered_abs_mean": 0.07783405929803848, "signal/frontier_coverage_15/group_std_mean": 0.09741163402795791, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015766285918653013, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011130270315334201, "signal/frontier_coverage_20/centered_abs_mean": 0.11139850169420243, "signal/frontier_coverage_20/group_std_mean": 0.14017115235328675, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022583086416125296, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015929985558614135, "signal/frontier_coverage_25/centered_abs_mean": 0.15247215330600739, "signal/frontier_coverage_25/group_std_mean": 0.192574143409729, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030896326154470445, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002180351689457893, "signal/frontier_coverage_5/centered_abs_mean": 0.1685707986354828, "signal/frontier_coverage_5/group_std_mean": 0.21966981887817383, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388794735074043, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002410562336444855, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32269885540008547, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39061746597290037, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45797826647758483, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03226988650858402, "step": 485 }, { "calibration/aurc": 0.12209578498722051, "calibration/batch_distribution_entropy": 0.958503429934321, "calibration/buffer_distribution_entropy": 0.9797430580609839, "calibration/confidence_entropy": 0.48254682389366027, "calibration/coverage@0%": 0.027819554748278075, "calibration/coverage@1%": 0.027819554748278075, "calibration/coverage@10%": 0.5379635313909457, "calibration/coverage@15%": 0.6975057368759578, "calibration/coverage@20%": 0.8211241707650437, "calibration/coverage@25%": 0.9300641786772388, "calibration/coverage@30%": 0.984251968503937, "calibration/coverage@5%": 0.25911881534772097, "calibration/ece": 0.1608791440963225, "calibration/mean_confidence": 0.5888582159462711, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 4010.4, "completions/max_terminated_length": 4010.4, "completions/mean_length": 828.8147705078125, "completions/mean_terminated_length": 837.3738647460938, "completions/min_length": 0.0, "completions/min_terminated_length": 152.0, "epoch": 1.1775977800277497, "grad_norm": 0.002430199645459652, "learning_rate": 3.3052884615384617e-06, "loss": -0.0208, "num_tokens": 1265643895.0, "reward": 1.008857047557831, "reward_std": 0.12614489495754241, "rewards/accuracy_reward": 0.7142361044883728, "rewards/brier_reward": 0.8259082913398743, "rewards/confidence_uniqueness_reward": 0.9401936411857605, "rewards/format_reward": 0.989756953716278, "rewards/frontier_coverage_0": 0.02575213145464659, "rewards/frontier_coverage_1": 0.02575213145464659, "rewards/frontier_coverage_10": 0.04685047268867493, "rewards/frontier_coverage_15": 0.10534610897302628, "rewards/frontier_coverage_20": 0.17943286299705505, "rewards/frontier_coverage_25": 0.26633910536766053, "rewards/frontier_coverage_5": 0.025928001292049883, "rewards/frontier_entropy_batch_reward": -0.2940792411565781, "signal/accuracy_reward/centered_abs_mean": 0.1511501759290695, "signal/accuracy_reward/group_std_mean": 0.19605484008789062, "signal/accuracy_reward/group_zero_std_frac": 0.45, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1121771931648254, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07557508796453476, "signal/advantage_abs_mean": 0.7582149028778076, "signal/advantage_pre_scale_abs_mean": 0.09528509080410004, "signal/advantage_pre_scale_std": 0.15467472672462462, "signal/advantage_std": 0.983012342453003, "signal/brier_reward/centered_abs_mean": 0.12473153322935104, "signal/brier_reward/group_std_mean": 0.16199083328247071, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18308565616607667, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012473153136670589, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.028536709398031233, "signal/confidence_uniqueness_reward/group_std_mean": 0.045708222687244414, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04193682223558426, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0028536709025502203, "signal/format_reward/centered_abs_mean": 0.016503906436264516, "signal/format_reward/group_std_mean": 0.031104812026023866, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12030726373195648, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008251953218132258, "signal/frontier_coverage_0/centered_abs_mean": 0.17551998496055604, "signal/frontier_coverage_0/group_std_mean": 0.22594922184944152, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03666192330420017, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025099357357248665, "signal/frontier_coverage_1/centered_abs_mean": 0.17551998496055604, "signal/frontier_coverage_1/group_std_mean": 0.22594922184944152, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03666192330420017, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025099357357248665, "signal/frontier_coverage_10/centered_abs_mean": 0.06443373411893845, "signal/frontier_coverage_10/group_std_mean": 0.08207622617483139, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013494310528039932, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000921402417588979, "signal/frontier_coverage_15/centered_abs_mean": 0.08036363422870636, "signal/frontier_coverage_15/group_std_mean": 0.10059184283018112, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017017839662730693, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011491999262943863, "signal/frontier_coverage_20/centered_abs_mean": 0.1145074725151062, "signal/frontier_coverage_20/group_std_mean": 0.14463033080101012, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02431431822478771, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001637456798925996, "signal/frontier_coverage_25/centered_abs_mean": 0.15715709924697877, "signal/frontier_coverage_25/group_std_mean": 0.1994520455598831, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03337772414088249, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002247346518561244, "signal/frontier_coverage_5/centered_abs_mean": 0.1747252196073532, "signal/frontier_coverage_5/group_std_mean": 0.22497124075889588, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036491810157895085, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002498570643365383, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3236543297767639, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39115352034568784, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4786272764205933, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03236543461680412, "step": 490 }, { "calibration/aurc": 0.15002666997967945, "calibration/batch_distribution_entropy": 0.9103722554100419, "calibration/buffer_distribution_entropy": 0.979845427603407, "calibration/confidence_entropy": 0.4612177545633805, "calibration/coverage@0%": 0.05277696335234473, "calibration/coverage@1%": 0.05277696335234473, "calibration/coverage@10%": 0.45088894750812303, "calibration/coverage@15%": 0.5895088163308796, "calibration/coverage@20%": 0.7311479303775578, "calibration/coverage@25%": 0.8568830252575138, "calibration/coverage@30%": 0.8994821504162551, "calibration/coverage@5%": 0.30198401792809815, "calibration/ece": 0.12433344289083512, "calibration/mean_confidence": 0.6259431202668907, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014496527777777768, "completions/max_length": 4034.0, "completions/max_terminated_length": 4034.0, "completions/mean_length": 829.9917602539062, "completions/mean_terminated_length": 842.1129760742188, "completions/min_length": 0.0, "completions/min_terminated_length": 151.2, "epoch": 1.1895976300296247, "grad_norm": 0.003350967774167657, "learning_rate": 3.2752403846153846e-06, "loss": -0.0402, "num_tokens": 1278284440.0, "reward": 0.997491466999054, "reward_std": 0.1228562831878662, "rewards/accuracy_reward": 0.705555546283722, "rewards/brier_reward": 0.829783308506012, "rewards/confidence_uniqueness_reward": 0.932075309753418, "rewards/format_reward": 0.9855034708976745, "rewards/frontier_coverage_0": 0.035646550729870795, "rewards/frontier_coverage_1": 0.035646550729870795, "rewards/frontier_coverage_10": 0.050268112868070605, "rewards/frontier_coverage_15": 0.11117468625307084, "rewards/frontier_coverage_20": 0.18869028091430665, "rewards/frontier_coverage_25": 0.2782452583312988, "rewards/frontier_coverage_5": 0.03579212427139282, "rewards/frontier_entropy_batch_reward": -0.34741051197052003, "signal/accuracy_reward/centered_abs_mean": 0.12520616501569748, "signal/accuracy_reward/group_std_mean": 0.17152535617351533, "signal/accuracy_reward/group_zero_std_frac": 0.49166667461395264, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9340617418289184, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06260308250784874, "signal/advantage_abs_mean": 0.7470793724060059, "signal/advantage_pre_scale_abs_mean": 0.08969731330871582, "signal/advantage_pre_scale_std": 0.15365103781223297, "signal/advantage_std": 0.9829949975013733, "signal/brier_reward/centered_abs_mean": 0.11885639429092407, "signal/brier_reward/group_std_mean": 0.15579778254032134, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17715712189674376, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01188563983887434, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036046646907925604, "signal/confidence_uniqueness_reward/group_std_mean": 0.05560135096311569, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05372623428702354, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0036046647001057863, "signal/format_reward/centered_abs_mean": 0.02235785610973835, "signal/format_reward/group_std_mean": 0.03920154646039009, "signal/format_reward/group_zero_std_frac": 0.8472222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.16617890894412995, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011178928054869175, "signal/frontier_coverage_0/centered_abs_mean": 0.14919237792491913, "signal/frontier_coverage_0/group_std_mean": 0.19633124768733978, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.031800294667482375, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00213345093652606, "signal/frontier_coverage_1/centered_abs_mean": 0.14919237792491913, "signal/frontier_coverage_1/group_std_mean": 0.19633124768733978, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.031800294667482375, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00213345093652606, "signal/frontier_coverage_10/centered_abs_mean": 0.0596095934510231, "signal/frontier_coverage_10/group_std_mean": 0.07624187171459199, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012707811035215854, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008524171658791602, "signal/frontier_coverage_15/centered_abs_mean": 0.08215909749269486, "signal/frontier_coverage_15/group_std_mean": 0.10303394496440887, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01755863316357136, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011748750694096088, "signal/frontier_coverage_20/centered_abs_mean": 0.11732317209243774, "signal/frontier_coverage_20/group_std_mean": 0.1485839903354645, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02507934905588627, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016777212964370846, "signal/frontier_coverage_25/centered_abs_mean": 0.1591496855020523, "signal/frontier_coverage_25/group_std_mean": 0.20272190868854523, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03401615396142006, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002275840612128377, "signal/frontier_coverage_5/centered_abs_mean": 0.14844318926334382, "signal/frontier_coverage_5/group_std_mean": 0.1953786164522171, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031640862300992015, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002122737606987357, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34229235649108886, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4075429916381836, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5113430559635163, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03422923907637596, "step": 495 }, { "calibration/aurc": 0.1747839233157916, "calibration/batch_distribution_entropy": 0.9480725878663744, "calibration/buffer_distribution_entropy": 0.9787909719533993, "calibration/confidence_entropy": 0.48127999860544374, "calibration/coverage@0%": 0.14181121071084904, "calibration/coverage@1%": 0.1857661207341445, "calibration/coverage@10%": 0.40346547235726893, "calibration/coverage@15%": 0.45549036350187266, "calibration/coverage@20%": 0.6920285357135578, "calibration/coverage@25%": 0.7202605792747888, "calibration/coverage@30%": 0.7576995831311144, "calibration/coverage@5%": 0.3586746769897794, "calibration/ece": 0.18344351358857308, "calibration/mean_confidence": 0.580451451919918, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.025434027777777767, "completions/max_length": 4065.8, "completions/max_terminated_length": 4065.8, "completions/mean_length": 929.4675415039062, "completions/mean_terminated_length": 953.9163452148438, "completions/min_length": 0.0, "completions/min_terminated_length": 185.4, "epoch": 1.2015974800314997, "grad_norm": 0.00263255275785923, "learning_rate": 3.245192307692308e-06, "loss": -0.0572, "num_tokens": 1292045410.0, "reward": 0.9915547251701355, "reward_std": 0.12933797985315323, "rewards/accuracy_reward": 0.7018229246139527, "rewards/brier_reward": 0.8028098583221436, "rewards/confidence_uniqueness_reward": 0.9266672849655151, "rewards/format_reward": 0.9745659708976746, "rewards/frontier_coverage_0": 0.02632404714822769, "rewards/frontier_coverage_1": 0.02632404714822769, "rewards/frontier_coverage_10": 0.04794232621788978, "rewards/frontier_coverage_15": 0.09831590056419373, "rewards/frontier_coverage_20": 0.1676923632621765, "rewards/frontier_coverage_25": 0.2507633984088898, "rewards/frontier_coverage_5": 0.026532990764826535, "rewards/frontier_entropy_batch_reward": -0.28795150518417356, "signal/accuracy_reward/centered_abs_mean": 0.12767469733953477, "signal/accuracy_reward/group_std_mean": 0.16988992393016816, "signal/accuracy_reward/group_zero_std_frac": 0.5111111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.93791184425354, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06383734866976738, "signal/advantage_abs_mean": 0.7479843735694885, "signal/advantage_pre_scale_abs_mean": 0.09546350091695785, "signal/advantage_pre_scale_std": 0.16411724388599397, "signal/advantage_std": 0.9830235481262207, "signal/brier_reward/centered_abs_mean": 0.13326035737991332, "signal/brier_reward/group_std_mean": 0.1708984136581421, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19498080313205718, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013326035998761653, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04590501487255096, "signal/confidence_uniqueness_reward/group_std_mean": 0.0698548398911953, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06717531010508537, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045905016828328375, "signal/format_reward/centered_abs_mean": 0.03612738735973835, "signal/format_reward/group_std_mean": 0.058421958982944486, "signal/format_reward/group_zero_std_frac": 0.7861111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.26427164673805237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.018063693679869174, "signal/frontier_coverage_0/centered_abs_mean": 0.18238037824630737, "signal/frontier_coverage_0/group_std_mean": 0.23497777283191681, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03820807188749313, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002608039416372776, "signal/frontier_coverage_1/centered_abs_mean": 0.18238037824630737, "signal/frontier_coverage_1/group_std_mean": 0.23497777283191681, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03820807188749313, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002608039416372776, "signal/frontier_coverage_10/centered_abs_mean": 0.06780672222375869, "signal/frontier_coverage_10/group_std_mean": 0.08613030165433884, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01419361848384142, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009696360910311341, "signal/frontier_coverage_15/centered_abs_mean": 0.07367192506790161, "signal/frontier_coverage_15/group_std_mean": 0.09180823713541031, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015434963069856167, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010535084875300527, "signal/frontier_coverage_20/centered_abs_mean": 0.1000160589814186, "signal/frontier_coverage_20/group_std_mean": 0.12563731968402864, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020969900116324425, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001430229633115232, "signal/frontier_coverage_25/centered_abs_mean": 0.1348770409822464, "signal/frontier_coverage_25/group_std_mean": 0.17066603899002075, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028289969265460967, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019287416245788336, "signal/frontier_coverage_5/centered_abs_mean": 0.18139650523662568, "signal/frontier_coverage_5/group_std_mean": 0.23375505208969116, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03800202459096909, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00259396992623806, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3278991162776947, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3979050636291504, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.479867422580719, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032789912074804306, "step": 500 }, { "epoch": 1.2015974800314997, "eval_calibration/aurc": 0.11063293442041942, "eval_calibration/batch_distribution_entropy": 0.9226941120344988, "eval_calibration/buffer_distribution_entropy": 0.9794678494682892, "eval_calibration/confidence_entropy": 0.48473487947917687, "eval_calibration/coverage@0%": 0.2981406810035842, "eval_calibration/coverage@1%": 0.2981406810035842, "eval_calibration/coverage@10%": 0.6006496415770609, "eval_calibration/coverage@15%": 0.783826164874552, "eval_calibration/coverage@20%": 0.8810035842293907, "eval_calibration/coverage@25%": 0.9301075268817205, "eval_calibration/coverage@30%": 0.989247311827957, "eval_calibration/coverage@5%": 0.4925739247311827, "eval_calibration/ece": 0.24244848342293904, "eval_calibration/mean_confidence": 0.5366845362903225, "eval_completions/clipped_ratio": 0.02777777777777779, "eval_completions/max_length": 3848.8333333333335, "eval_completions/max_terminated_length": 3848.8333333333335, "eval_completions/mean_length": 937.8027954101562, "eval_completions/mean_terminated_length": 964.6242472330729, "eval_completions/min_length": 44.0, "eval_completions/min_terminated_length": 174.5, "eval_loss": 0.0, "eval_num_tokens": 1292045410.0, "eval_reward": 0.8992621103922526, "eval_reward_std": 0.27247366060813266, "eval_rewards/accuracy_reward": 0.6744791567325592, "eval_rewards/brier_reward": 0.7963380813598633, "eval_rewards/confidence_uniqueness_reward": 0.8638127446174622, "eval_rewards/format_reward": 0.9678819477558136, "eval_rewards/frontier_coverage_0": 0.03403743077069521, "eval_rewards/frontier_coverage_1": 0.03403743077069521, "eval_rewards/frontier_coverage_10": 0.04340067381660143, "eval_rewards/frontier_coverage_15": 0.08919741213321686, "eval_rewards/frontier_coverage_20": 0.15402339398860931, "eval_rewards/frontier_coverage_25": 0.23039834946393967, "eval_rewards/frontier_coverage_5": 0.0341131171832482, "eval_rewards/frontier_entropy_batch_reward": -0.9678819477558136, "eval_runtime": 226.0063, "eval_samples_per_second": 4.425, "eval_signal/accuracy_reward/centered_abs_mean": 0.4259440153837204, "eval_signal/accuracy_reward/group_std_mean": 0.46788185834884644, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7978424926598867, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2129720076918602, "eval_signal/advantage_abs_mean": 0.8432898223400116, "eval_signal/advantage_pre_scale_abs_mean": 0.2298309033115705, "eval_signal/advantage_pre_scale_std": 0.2716887692610423, "eval_signal/advantage_std": 0.9864533146222433, "eval_signal/brier_reward/centered_abs_mean": 0.1994489332040151, "eval_signal/brier_reward/group_std_mean": 0.26833559075991315, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07479969660441081, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019944893817106884, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07945458094278972, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14848080774148306, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029809714915851753, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007945458016668757, "eval_signal/format_reward/centered_abs_mean": 0.060601128886143364, "eval_signal/format_reward/group_std_mean": 0.1444742592672507, "eval_signal/format_reward/group_zero_std_frac": 0.3055555621782939, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.1130801538626353, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.030300564443071682, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2900494833787282, "eval_signal/frontier_coverage_0/group_std_mean": 0.402191494901975, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01558228504533569, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004147707639882962, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2900494833787282, "eval_signal/frontier_coverage_1/group_std_mean": 0.402191494901975, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01558228504533569, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004147707639882962, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.09187478696306546, "eval_signal/frontier_coverage_10/group_std_mean": 0.12893202776710191, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004941360326483846, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001313809499454995, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.10598516836762428, "eval_signal/frontier_coverage_15/group_std_mean": 0.13580323879917464, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.005688676067317526, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015155878500081599, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.1837949976325035, "eval_signal/frontier_coverage_20/group_std_mean": 0.22733782976865768, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.009865260062118372, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026282683635751405, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.27196736137072247, "eval_signal/frontier_coverage_25/group_std_mean": 0.3316345016161601, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.014592031327386698, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003889133183596035, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2886228909095128, "eval_signal/frontier_coverage_5/group_std_mean": 0.4004148344198863, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015506657616545757, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.004127307174106439, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.060601128886143364, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1444742592672507, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3055555621782939, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.022616030648350716, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.006060113121444981, "eval_steps_per_second": 0.027, "step": 500 }, { "epoch": 1.2015974800314997, "step": 500, "train_probe_calibration/aurc": 0.10082706938955065, "train_probe_calibration/batch_distribution_entropy": 0.9239375688015242, "train_probe_calibration/buffer_distribution_entropy": 0.9797398341398745, "train_probe_calibration/confidence_entropy": 0.5040916248291127, "train_probe_calibration/coverage@0%": 0.3246373285131628, "train_probe_calibration/coverage@1%": 0.3246373285131628, "train_probe_calibration/coverage@10%": 0.5615556637004079, "train_probe_calibration/coverage@15%": 0.7940188172043011, "train_probe_calibration/coverage@20%": 0.9206989247311829, "train_probe_calibration/coverage@25%": 0.9627016129032259, "train_probe_calibration/coverage@30%": 0.9946236559139785, "train_probe_calibration/coverage@5%": 0.43221635150166854, "train_probe_calibration/ece": 0.23516302836484984, "train_probe_calibration/mean_confidence": 0.5604287286800148, "train_probe_completions/clipped_ratio": 0.025868055555555564, "train_probe_completions/max_length": 3734.5, "train_probe_completions/max_terminated_length": 3734.5, "train_probe_completions/mean_length": 925.494639078776, "train_probe_completions/mean_terminated_length": 950.2415771484375, "train_probe_completions/min_length": 0.0, "train_probe_completions/min_terminated_length": 142.5, "train_probe_loss": 0.0, "train_probe_num_tokens": 1292045410.0, "train_probe_reward": 0.9318026403586069, "train_probe_reward_std": 0.2528877506653468, "train_probe_rewards/accuracy_reward": 0.7326388855775198, "train_probe_rewards/brier_reward": 0.8016867140928904, "train_probe_rewards/confidence_uniqueness_reward": 0.875600536664327, "train_probe_rewards/format_reward": 0.9739583333333334, "train_probe_rewards/frontier_coverage_0": -0.0028847836268444857, "train_probe_rewards/frontier_coverage_1": -0.0028847836268444857, "train_probe_rewards/frontier_coverage_10": 0.038503200436631836, "train_probe_rewards/frontier_coverage_15": 0.10058233390251796, "train_probe_rewards/frontier_coverage_20": 0.1763912762204806, "train_probe_rewards/frontier_coverage_25": 0.2643541420499484, "train_probe_rewards/frontier_coverage_5": -0.0026549692265689373, "train_probe_rewards/frontier_entropy_batch_reward": -0.9739583333333334, "train_probe_runtime": 213.9075, "train_probe_samples_per_second": 4.675, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3841145783662796, "train_probe_signal/accuracy_reward/group_std_mean": 0.44358054796854657, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.785220742225647, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.1920572891831398, "train_probe_signal/advantage_abs_mean": 0.806507021188736, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20440822591384253, "train_probe_signal/advantage_pre_scale_std": 0.25263623893260956, "train_probe_signal/advantage_std": 0.9864190816879272, "train_probe_signal/brier_reward/centered_abs_mean": 0.1916190137465795, "train_probe_signal/brier_reward/group_std_mean": 0.25596588601668674, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.078202273696661, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.019161902368068695, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07009036901096503, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.1314548502365748, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028176602286597092, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007009036994228761, "train_probe_signal/format_reward/centered_abs_mean": 0.04937065920482079, "train_probe_signal/format_reward/group_std_mean": 0.12234597342709701, "train_probe_signal/format_reward/group_zero_std_frac": 0.3888888979951541, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.09680349566042423, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.024685329602410395, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2796660164992015, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.40178043643633526, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01646174117922783, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003999224087844293, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2796660164992015, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.40178043643633526, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01646174117922783, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003999224087844293, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09035198017954826, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.12981040154894194, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005333998861412208, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012920333344178896, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1053730125228564, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.13237932324409485, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006203630783905585, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015068341551038127, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18111580361922583, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.21775591125090918, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010658677046497663, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0025899558483312526, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2640492667754491, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.31466857592264813, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.015524488873779774, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003775904420763254, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.27851397295792896, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.40030378103256226, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016393487496922415, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003982749573575954, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.04937065920482079, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.12234597342709701, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3888888979951541, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019360700622200966, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.004937066075702508, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.20048948593563543, "calibration/batch_distribution_entropy": 0.9468109675307013, "calibration/buffer_distribution_entropy": 0.9805443145825734, "calibration/confidence_entropy": 0.48701692744443614, "calibration/coverage@0%": 0.02852633425186054, "calibration/coverage@1%": 0.09942580515133145, "calibration/coverage@10%": 0.2110494824000087, "calibration/coverage@15%": 0.3519585394662962, "calibration/coverage@20%": 0.48718463966566417, "calibration/coverage@25%": 0.7482349120999991, "calibration/coverage@30%": 0.8652923686374748, "calibration/coverage@5%": 0.2004840062095325, "calibration/ece": 0.15429015886639375, "calibration/mean_confidence": 0.5836335671726103, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.022395833333333327, "completions/max_length": 4073.4, "completions/max_terminated_length": 4073.4, "completions/mean_length": 924.2508056640625, "completions/mean_terminated_length": 946.0132934570313, "completions/min_length": 0.0, "completions/min_terminated_length": 138.4, "epoch": 1.2135973300333747, "grad_norm": 0.0023646834306418896, "learning_rate": 3.215144230769231e-06, "loss": -0.0669, "num_tokens": 1305812971.0, "reward": 0.9820307612419128, "reward_std": 0.13412527740001678, "rewards/accuracy_reward": 0.6799479246139526, "rewards/brier_reward": 0.8102263927459716, "rewards/confidence_uniqueness_reward": 0.9278807520866394, "rewards/format_reward": 0.97734375, "rewards/frontier_coverage_0": 0.03650612365454435, "rewards/frontier_coverage_1": 0.03650612365454435, "rewards/frontier_coverage_10": 0.04657657854259014, "rewards/frontier_coverage_15": 0.09520111978054047, "rewards/frontier_coverage_20": 0.16185519099235535, "rewards/frontier_coverage_25": 0.23959860801696778, "rewards/frontier_coverage_5": 0.03667759094387293, "rewards/frontier_entropy_batch_reward": -0.2976256161928177, "signal/accuracy_reward/centered_abs_mean": 0.13191731721162797, "signal/accuracy_reward/group_std_mean": 0.17187346816062926, "signal/accuracy_reward/group_zero_std_frac": 0.5166666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9841681718826294, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06595865860581399, "signal/advantage_abs_mean": 0.7553405404090882, "signal/advantage_pre_scale_abs_mean": 0.09927740842103958, "signal/advantage_pre_scale_std": 0.17140043079853057, "signal/advantage_std": 0.9830001354217529, "signal/brier_reward/centered_abs_mean": 0.12971344888210296, "signal/brier_reward/group_std_mean": 0.16749563217163085, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19204829931259154, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012971345335245132, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.046678535640239716, "signal/confidence_uniqueness_reward/group_std_mean": 0.0732833631336689, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06865589916706086, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004667853750288486, "signal/format_reward/centered_abs_mean": 0.03662651926279068, "signal/format_reward/group_std_mean": 0.061449573189020154, "signal/format_reward/group_zero_std_frac": 0.7722222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.26796387135982513, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01831325963139534, "signal/frontier_coverage_0/centered_abs_mean": 0.16079207360744477, "signal/frontier_coverage_0/group_std_mean": 0.2081853598356247, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034043775871396065, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022993266582489015, "signal/frontier_coverage_1/centered_abs_mean": 0.16079207360744477, "signal/frontier_coverage_1/group_std_mean": 0.2081853598356247, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034043775871396065, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022993266582489015, "signal/frontier_coverage_10/centered_abs_mean": 0.062105555832386014, "signal/frontier_coverage_10/group_std_mean": 0.07952445596456528, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013151372782886028, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008881094516254961, "signal/frontier_coverage_15/centered_abs_mean": 0.07533517330884934, "signal/frontier_coverage_15/group_std_mean": 0.09395917057991028, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016060548834502697, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001077292929403484, "signal/frontier_coverage_20/centered_abs_mean": 0.10723637640476227, "signal/frontier_coverage_20/group_std_mean": 0.1344798132777214, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022898206114768983, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001533480198122561, "signal/frontier_coverage_25/centered_abs_mean": 0.14641993939876558, "signal/frontier_coverage_25/group_std_mean": 0.1840170592069626, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03127242475748062, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020938052562996744, "signal/frontier_coverage_5/centered_abs_mean": 0.16002678871154785, "signal/frontier_coverage_5/group_std_mean": 0.20722314417362214, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03388084582984448, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022883829893544315, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3222051739692688, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3910215377807617, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4791221499443054, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03222051709890365, "step": 505 }, { "calibration/aurc": 0.11524218938071602, "calibration/batch_distribution_entropy": 0.9543161540977898, "calibration/buffer_distribution_entropy": 0.9807149695576822, "calibration/confidence_entropy": 0.48301446353835925, "calibration/coverage@0%": 0.07332392905046345, "calibration/coverage@1%": 0.15872933445586884, "calibration/coverage@10%": 0.5471898087890095, "calibration/coverage@15%": 0.6684821027885148, "calibration/coverage@20%": 0.8312767838615454, "calibration/coverage@25%": 0.9212252117614753, "calibration/coverage@30%": 0.9618798955613578, "calibration/coverage@5%": 0.32275472534539945, "calibration/ece": 0.14129609700485238, "calibration/mean_confidence": 0.6127158754894365, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.027083333333333348, "completions/max_length": 4070.2, "completions/max_terminated_length": 4070.2, "completions/mean_length": 886.969970703125, "completions/mean_terminated_length": 911.6876220703125, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 1.2255971800352496, "grad_norm": 0.0024071610532701015, "learning_rate": 3.185096153846154e-06, "loss": -0.0754, "num_tokens": 1319165553.0, "reward": 0.9764072179794312, "reward_std": 0.14252603948116302, "rewards/accuracy_reward": 0.6737847089767456, "rewards/brier_reward": 0.811568808555603, "rewards/confidence_uniqueness_reward": 0.9235637068748475, "rewards/format_reward": 0.9729166626930237, "rewards/frontier_coverage_0": 0.03956596069037914, "rewards/frontier_coverage_1": 0.03956596069037914, "rewards/frontier_coverage_10": 0.045381075143814086, "rewards/frontier_coverage_15": 0.09828296452760696, "rewards/frontier_coverage_20": 0.16703141033649443, "rewards/frontier_coverage_25": 0.24602045118808746, "rewards/frontier_coverage_5": 0.03960155472159386, "rewards/frontier_entropy_batch_reward": -0.301156747341156, "signal/accuracy_reward/centered_abs_mean": 0.1375868022441864, "signal/accuracy_reward/group_std_mean": 0.18322362005710602, "signal/accuracy_reward/group_zero_std_frac": 0.4750000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9304415464401246, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0687934011220932, "signal/advantage_abs_mean": 0.7482947945594788, "signal/advantage_pre_scale_abs_mean": 0.10517836511135101, "signal/advantage_pre_scale_std": 0.17666726410388947, "signal/advantage_std": 0.9831276297569275, "signal/brier_reward/centered_abs_mean": 0.13354314863681793, "signal/brier_reward/group_std_mean": 0.17319420278072356, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18088602125644684, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013354315236210824, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04955080598592758, "signal/confidence_uniqueness_reward/group_std_mean": 0.07686270922422409, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06728862300515175, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004955080850049853, "signal/format_reward/centered_abs_mean": 0.03995225727558136, "signal/format_reward/group_std_mean": 0.06567521169781684, "signal/format_reward/group_zero_std_frac": 0.7583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2711753636598587, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01997612863779068, "signal/frontier_coverage_0/centered_abs_mean": 0.15956219732761384, "signal/frontier_coverage_0/group_std_mean": 0.2080337166786194, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03082931824028492, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002281739329919219, "signal/frontier_coverage_1/centered_abs_mean": 0.15956219732761384, "signal/frontier_coverage_1/group_std_mean": 0.2080337166786194, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03082931824028492, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002281739329919219, "signal/frontier_coverage_10/centered_abs_mean": 0.062475910782814024, "signal/frontier_coverage_10/group_std_mean": 0.08098638206720352, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012097678333520889, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008934055338613689, "signal/frontier_coverage_15/centered_abs_mean": 0.0785724624991417, "signal/frontier_coverage_15/group_std_mean": 0.09894902110099793, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015267135202884674, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011235862039029598, "signal/frontier_coverage_20/centered_abs_mean": 0.11322257518768311, "signal/frontier_coverage_20/group_std_mean": 0.14310256838798524, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021999914199113846, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001619082852266729, "signal/frontier_coverage_25/centered_abs_mean": 0.15498829185962676, "signal/frontier_coverage_25/group_std_mean": 0.19647544622421265, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030105485394597054, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022163325920701027, "signal/frontier_coverage_5/centered_abs_mean": 0.15889337360858918, "signal/frontier_coverage_5/group_std_mean": 0.20719724297523498, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030700084939599036, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022721752058714626, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3251783013343811, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39037303924560546, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44218236207962036, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03251783214509487, "step": 510 }, { "calibration/aurc": 0.14329086717155515, "calibration/batch_distribution_entropy": 0.9839842077691205, "calibration/buffer_distribution_entropy": 0.9813149779353205, "calibration/confidence_entropy": 0.4870171331461034, "calibration/coverage@0%": 0.14524750649642462, "calibration/coverage@1%": 0.20226281740410096, "calibration/coverage@10%": 0.5290204554078171, "calibration/coverage@15%": 0.6161437388665884, "calibration/coverage@20%": 0.6710200053905481, "calibration/coverage@25%": 0.732897098273093, "calibration/coverage@30%": 0.9145777773032158, "calibration/coverage@5%": 0.3410370616474247, "calibration/ece": 0.18543605582400366, "calibration/mean_confidence": 0.5287664657151198, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.038107638888888885, "completions/max_length": 4064.6, "completions/max_terminated_length": 4064.6, "completions/mean_length": 964.12431640625, "completions/mean_terminated_length": 1002.4943969726562, "completions/min_length": 0.0, "completions/min_terminated_length": 136.0, "epoch": 1.2375970300371246, "grad_norm": 0.0022482494823634624, "learning_rate": 3.1550480769230772e-06, "loss": -0.1034, "num_tokens": 1333396905.0, "reward": 0.9720722794532776, "reward_std": 0.15557878315448762, "rewards/accuracy_reward": 0.6752604126930237, "rewards/brier_reward": 0.7917217254638672, "rewards/confidence_uniqueness_reward": 0.9149444341659546, "rewards/format_reward": 0.9618923664093018, "rewards/frontier_coverage_0": 0.03308947309851647, "rewards/frontier_coverage_1": 0.03308947309851647, "rewards/frontier_coverage_10": 0.047409339994192126, "rewards/frontier_coverage_15": 0.09314282685518264, "rewards/frontier_coverage_20": 0.15709015727043152, "rewards/frontier_coverage_25": 0.2324953556060791, "rewards/frontier_coverage_5": 0.03309837207198143, "rewards/frontier_entropy_batch_reward": -0.2617133766412735, "signal/accuracy_reward/centered_abs_mean": 0.15260959267616273, "signal/accuracy_reward/group_std_mean": 0.19807665944099426, "signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9974145650863647, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07630479633808136, "signal/advantage_abs_mean": 0.7443260788917542, "signal/advantage_pre_scale_abs_mean": 0.11457104533910752, "signal/advantage_pre_scale_std": 0.1953383594751358, "signal/advantage_std": 0.9831731081008911, "signal/brier_reward/centered_abs_mean": 0.1467885345220566, "signal/brier_reward/group_std_mean": 0.18838548064231872, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19225478768348694, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014678853936493397, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06417426541447639, "signal/confidence_uniqueness_reward/group_std_mean": 0.09944256693124771, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08404597043991088, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006417426653206349, "signal/format_reward/centered_abs_mean": 0.05706922709941864, "signal/format_reward/group_std_mean": 0.09146839380264282, "signal/format_reward/group_zero_std_frac": 0.675000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.37363603711128235, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02853461354970932, "signal/frontier_coverage_0/centered_abs_mean": 0.1946762889623642, "signal/frontier_coverage_0/group_std_mean": 0.25131337344646454, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036392098665237425, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002783870976418257, "signal/frontier_coverage_1/centered_abs_mean": 0.1946762889623642, "signal/frontier_coverage_1/group_std_mean": 0.25131337344646454, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036392098665237425, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002783870976418257, "signal/frontier_coverage_10/centered_abs_mean": 0.07278724461793899, "signal/frontier_coverage_10/group_std_mean": 0.09403313398361206, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013623019121587277, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010408576345071197, "signal/frontier_coverage_15/centered_abs_mean": 0.07076575458049775, "signal/frontier_coverage_15/group_std_mean": 0.08887670189142227, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.013323003239929677, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010119502898305655, "signal/frontier_coverage_20/centered_abs_mean": 0.09599952101707458, "signal/frontier_coverage_20/group_std_mean": 0.12031063288450242, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01808130946010351, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001372793153859675, "signal/frontier_coverage_25/centered_abs_mean": 0.13066532760858535, "signal/frontier_coverage_25/group_std_mean": 0.1640017569065094, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0245991725474596, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018685141811147332, "signal/frontier_coverage_5/centered_abs_mean": 0.19397080540657044, "signal/frontier_coverage_5/group_std_mean": 0.25044049620628356, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0362599141895771, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0027737823780626058, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31684728264808654, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3862759530544281, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4152994632720947, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03168472871184349, "step": 515 }, { "calibration/aurc": 0.10522250668379389, "calibration/batch_distribution_entropy": 0.9442073604560515, "calibration/buffer_distribution_entropy": 0.9815735705294294, "calibration/confidence_entropy": 0.46795378636960266, "calibration/coverage@0%": 0.21857111712669325, "calibration/coverage@1%": 0.22694808047747855, "calibration/coverage@10%": 0.621711658227854, "calibration/coverage@15%": 0.7427390973094512, "calibration/coverage@20%": 0.8060069914470027, "calibration/coverage@25%": 0.8796992448478779, "calibration/coverage@30%": 0.943609360040151, "calibration/coverage@5%": 0.3731836924541433, "calibration/ece": 0.155591407185311, "calibration/mean_confidence": 0.5833382217229488, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02178819444444442, "completions/max_length": 4046.4, "completions/max_terminated_length": 4046.4, "completions/mean_length": 969.8170288085937, "completions/mean_terminated_length": 991.4690673828125, "completions/min_length": 0.0, "completions/min_terminated_length": 134.8, "epoch": 1.2495968800389996, "grad_norm": 0.002296426799148321, "learning_rate": 3.125e-06, "loss": -0.06, "num_tokens": 1347645709.0, "reward": 0.999582850933075, "reward_std": 0.13867679089307786, "rewards/accuracy_reward": 0.7163194417953491, "rewards/brier_reward": 0.8157782912254333, "rewards/confidence_uniqueness_reward": 0.9268351197242737, "rewards/format_reward": 0.9780381917953491, "rewards/frontier_coverage_0": 0.01865054778754711, "rewards/frontier_coverage_1": 0.01865054778754711, "rewards/frontier_coverage_10": 0.04594656229019165, "rewards/frontier_coverage_15": 0.10937698483467102, "rewards/frontier_coverage_20": 0.18607729077339172, "rewards/frontier_coverage_25": 0.2728832870721817, "rewards/frontier_coverage_5": 0.018807118758559227, "rewards/frontier_entropy_batch_reward": -0.31443960666656495, "signal/accuracy_reward/centered_abs_mean": 0.13279080092906953, "signal/accuracy_reward/group_std_mean": 0.17991018891334534, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9323319673538208, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06639540046453477, "signal/advantage_abs_mean": 0.7279812693595886, "signal/advantage_pre_scale_abs_mean": 0.09768068045377731, "signal/advantage_pre_scale_std": 0.17483056783676149, "signal/advantage_std": 0.983068585395813, "signal/brier_reward/centered_abs_mean": 0.13028870820999144, "signal/brier_reward/group_std_mean": 0.17139129638671874, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18324156403541564, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013028871826827526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.048380535840988156, "signal/confidence_uniqueness_reward/group_std_mean": 0.08181221485137939, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06806915402412414, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0048380537889897825, "signal/format_reward/centered_abs_mean": 0.03767903596162796, "signal/format_reward/group_std_mean": 0.06963710114359856, "signal/format_reward/group_zero_std_frac": 0.7194444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2640444874763489, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01883951798081398, "signal/frontier_coverage_0/centered_abs_mean": 0.16820046305656433, "signal/frontier_coverage_0/group_std_mean": 0.21814047396183014, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03390519693493843, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002405266650021076, "signal/frontier_coverage_1/centered_abs_mean": 0.16820046305656433, "signal/frontier_coverage_1/group_std_mean": 0.21814047396183014, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03390519693493843, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002405266650021076, "signal/frontier_coverage_10/centered_abs_mean": 0.06460350453853607, "signal/frontier_coverage_10/group_std_mean": 0.08274413645267487, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013064522296190262, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009238301077857614, "signal/frontier_coverage_15/centered_abs_mean": 0.07587840259075165, "signal/frontier_coverage_15/group_std_mean": 0.09482774585485458, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015400785207748412, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010850611375644802, "signal/frontier_coverage_20/centered_abs_mean": 0.10519644320011139, "signal/frontier_coverage_20/group_std_mean": 0.13207355737686158, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021333112940192224, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015043091727420688, "signal/frontier_coverage_25/centered_abs_mean": 0.14107316136360168, "signal/frontier_coverage_25/group_std_mean": 0.17815456092357634, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028566186130046845, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002017346234060824, "signal/frontier_coverage_5/centered_abs_mean": 0.1673600971698761, "signal/frontier_coverage_5/group_std_mean": 0.21709263622760772, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03373638391494751, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023932492826133967, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32988876700401304, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3970253348350525, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46771731972694397, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03298887610435486, "step": 520 }, { "calibration/aurc": 0.07573435525317126, "calibration/batch_distribution_entropy": 0.9798587675052751, "calibration/buffer_distribution_entropy": 0.9805877942073135, "calibration/confidence_entropy": 0.48889949932619253, "calibration/coverage@0%": 0.1964918246394519, "calibration/coverage@1%": 0.31187742499497545, "calibration/coverage@10%": 0.727026166554092, "calibration/coverage@15%": 0.8365429401666103, "calibration/coverage@20%": 0.9077380952380952, "calibration/coverage@25%": 0.9482638888888889, "calibration/coverage@30%": 0.9755208333333334, "calibration/coverage@5%": 0.5086287200446998, "calibration/ece": 0.21796827993122228, "calibration/mean_confidence": 0.554094432213387, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015624999999999977, "completions/max_length": 4022.0, "completions/max_terminated_length": 4022.0, "completions/mean_length": 975.5559936523438, "completions/mean_terminated_length": 991.1448974609375, "completions/min_length": 0.0, "completions/min_terminated_length": 128.4, "epoch": 1.2615967300408744, "grad_norm": 0.002525532152503729, "learning_rate": 3.094951923076923e-06, "loss": -0.0445, "num_tokens": 1362017970.0, "reward": 1.0022146463394166, "reward_std": 0.13520086407661439, "rewards/accuracy_reward": 0.7090277671813965, "rewards/brier_reward": 0.8121278762817383, "rewards/confidence_uniqueness_reward": 0.9360453128814697, "rewards/format_reward": 0.9843749880790711, "rewards/frontier_coverage_0": 0.013298888225108385, "rewards/frontier_coverage_1": 0.013298888225108385, "rewards/frontier_coverage_10": 0.04437965005636215, "rewards/frontier_coverage_15": 0.09919513911008834, "rewards/frontier_coverage_20": 0.16938573122024536, "rewards/frontier_coverage_25": 0.2509212583303452, "rewards/frontier_coverage_5": 0.013450206723064184, "rewards/frontier_entropy_batch_reward": -0.27940293252468107, "signal/accuracy_reward/centered_abs_mean": 0.14516059160232545, "signal/accuracy_reward/group_std_mean": 0.1927432417869568, "signal/accuracy_reward/group_zero_std_frac": 0.44722222685813906, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.031061041355133, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07258029580116272, "signal/advantage_abs_mean": 0.7332652449607849, "signal/advantage_pre_scale_abs_mean": 0.09713614881038665, "signal/advantage_pre_scale_std": 0.1668863743543625, "signal/advantage_std": 0.9830695152282715, "signal/brier_reward/centered_abs_mean": 0.12466190755367279, "signal/brier_reward/group_std_mean": 0.16394980251789093, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.176994127035141, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012466190941631794, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.036994371190667154, "signal/confidence_uniqueness_reward/group_std_mean": 0.06596145778894424, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05264209508895874, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003699437016621232, "signal/format_reward/centered_abs_mean": 0.02797309048473835, "signal/format_reward/group_std_mean": 0.05659161433577538, "signal/format_reward/group_zero_std_frac": 0.7555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.19929880797863006, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013986545242369175, "signal/frontier_coverage_0/centered_abs_mean": 0.176213401556015, "signal/frontier_coverage_0/group_std_mean": 0.23016616702079773, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03586722575128078, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025198515504598618, "signal/frontier_coverage_1/centered_abs_mean": 0.176213401556015, "signal/frontier_coverage_1/group_std_mean": 0.23016616702079773, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03586722575128078, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025198515504598618, "signal/frontier_coverage_10/centered_abs_mean": 0.0657036691904068, "signal/frontier_coverage_10/group_std_mean": 0.08542147278785706, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013377317041158677, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009395624394528568, "signal/frontier_coverage_15/centered_abs_mean": 0.07424005419015885, "signal/frontier_coverage_15/group_std_mean": 0.0930503711104393, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015023346804082394, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010616328101605177, "signal/frontier_coverage_20/centered_abs_mean": 0.10434054583311081, "signal/frontier_coverage_20/group_std_mean": 0.13108078241348267, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021078139171004297, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014920698245987295, "signal/frontier_coverage_25/centered_abs_mean": 0.14281499981880189, "signal/frontier_coverage_25/group_std_mean": 0.17953548729419708, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028848520666360854, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002042254386469722, "signal/frontier_coverage_5/centered_abs_mean": 0.17538869976997376, "signal/frontier_coverage_5/group_std_mean": 0.22913878560066223, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035702398791909215, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025080583058297635, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32218562960624697, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3871644794940948, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45660458207130433, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032218563556671145, "step": 525 }, { "calibration/aurc": 0.12962692277572296, "calibration/batch_distribution_entropy": 0.9677968027317269, "calibration/buffer_distribution_entropy": 0.9805717030444366, "calibration/confidence_entropy": 0.48113403159133633, "calibration/coverage@0%": 0.18798672099690084, "calibration/coverage@1%": 0.2269086786688585, "calibration/coverage@10%": 0.6417013312562826, "calibration/coverage@15%": 0.7091990515665254, "calibration/coverage@20%": 0.751765976896867, "calibration/coverage@25%": 0.7722513089005235, "calibration/coverage@30%": 0.8313000360120778, "calibration/coverage@5%": 0.44918795254640304, "calibration/ece": 0.19004887245585186, "calibration/mean_confidence": 0.5680402399944228, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013628472222222233, "completions/max_length": 3961.8, "completions/max_terminated_length": 3961.8, "completions/mean_length": 875.2192016601563, "completions/mean_terminated_length": 887.3105590820312, "completions/min_length": 0.0, "completions/min_terminated_length": 137.4, "epoch": 1.2735965800427493, "grad_norm": 0.0029400011990219355, "learning_rate": 3.0649038461538464e-06, "loss": -0.0226, "num_tokens": 1375179855.0, "reward": 1.0003417372703551, "reward_std": 0.13130183517932892, "rewards/accuracy_reward": 0.703125, "rewards/brier_reward": 0.8222099423408509, "rewards/confidence_uniqueness_reward": 0.936951196193695, "rewards/format_reward": 0.9862847328186035, "rewards/frontier_coverage_0": 0.03191882474347949, "rewards/frontier_coverage_1": 0.03191882474347949, "rewards/frontier_coverage_10": 0.04538390077650547, "rewards/frontier_coverage_15": 0.10414295643568039, "rewards/frontier_coverage_20": 0.17619226574897767, "rewards/frontier_coverage_25": 0.25885328352451326, "rewards/frontier_coverage_5": 0.03208579635247588, "rewards/frontier_entropy_batch_reward": -0.3001033067703247, "signal/accuracy_reward/centered_abs_mean": 0.15182291865348815, "signal/accuracy_reward/group_std_mean": 0.19853868186473847, "signal/accuracy_reward/group_zero_std_frac": 0.4388888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0715280532836915, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07591145932674408, "signal/advantage_abs_mean": 0.7480924487113952, "signal/advantage_pre_scale_abs_mean": 0.0976143628358841, "signal/advantage_pre_scale_std": 0.16226148903369902, "signal/advantage_std": 0.9830726742744446, "signal/brier_reward/centered_abs_mean": 0.12366195023059845, "signal/brier_reward/group_std_mean": 0.16041145622730255, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17475055456161498, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012366195768117904, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.033436324819922446, "signal/confidence_uniqueness_reward/group_std_mean": 0.05639224275946617, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047277077287435534, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003343632631003857, "signal/format_reward/centered_abs_mean": 0.02293836809694767, "signal/format_reward/group_std_mean": 0.04403809979557991, "signal/format_reward/group_zero_std_frac": 0.8138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.16185255199670792, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011469184048473835, "signal/frontier_coverage_0/centered_abs_mean": 0.17727761566638947, "signal/frontier_coverage_0/group_std_mean": 0.2270788460969925, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03581186383962631, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025350698735564945, "signal/frontier_coverage_1/centered_abs_mean": 0.17727761566638947, "signal/frontier_coverage_1/group_std_mean": 0.2270788460969925, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03581186383962631, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025350698735564945, "signal/frontier_coverage_10/centered_abs_mean": 0.07169679552316666, "signal/frontier_coverage_10/group_std_mean": 0.09079298973083497, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014486683905124665, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010252641513943672, "signal/frontier_coverage_15/centered_abs_mean": 0.07800347357988358, "signal/frontier_coverage_15/group_std_mean": 0.09790896475315095, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015783482789993288, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011154496809467674, "signal/frontier_coverage_20/centered_abs_mean": 0.10981054455041886, "signal/frontier_coverage_20/group_std_mean": 0.1392547756433487, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02221333757042885, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015702907461673022, "signal/frontier_coverage_25/centered_abs_mean": 0.1499340057373047, "signal/frontier_coverage_25/group_std_mean": 0.19064950346946716, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03032132089138031, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021440562792122363, "signal/frontier_coverage_5/centered_abs_mean": 0.17626629769802094, "signal/frontier_coverage_5/group_std_mean": 0.22582550942897797, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03560806550085545, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002520608017221093, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32504919171333313, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3949630320072174, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4604054570198059, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032504919171333316, "step": 530 }, { "calibration/aurc": 0.05840357354887128, "calibration/batch_distribution_entropy": 0.9378896107732835, "calibration/buffer_distribution_entropy": 0.9803632250109828, "calibration/confidence_entropy": 0.4649623520355387, "calibration/coverage@0%": 0.1630159388228418, "calibration/coverage@1%": 0.26672793130768035, "calibration/coverage@10%": 0.7872173597822631, "calibration/coverage@15%": 0.8644275603071581, "calibration/coverage@20%": 0.9390392566484804, "calibration/coverage@25%": 0.9847320651498197, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.7084764103084508, "calibration/ece": 0.1835135729808905, "calibration/mean_confidence": 0.6055113479510223, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009809027777777767, "completions/max_length": 3855.2, "completions/max_terminated_length": 3855.2, "completions/mean_length": 789.052001953125, "completions/mean_terminated_length": 797.0057495117187, "completions/min_length": 0.0, "completions/min_terminated_length": 133.4, "epoch": 1.2855964300446243, "grad_norm": 0.0027481773868203163, "learning_rate": 3.0348557692307694e-06, "loss": -0.0222, "num_tokens": 1387338630.0, "reward": 1.0056124329566956, "reward_std": 0.11937729865312577, "rewards/accuracy_reward": 0.712499988079071, "rewards/brier_reward": 0.8213678240776062, "rewards/confidence_uniqueness_reward": 0.9392488479614258, "rewards/format_reward": 0.9901041626930237, "rewards/frontier_coverage_0": 0.022314731776714326, "rewards/frontier_coverage_1": 0.022314731776714326, "rewards/frontier_coverage_10": 0.04989167377352714, "rewards/frontier_coverage_15": 0.1045038491487503, "rewards/frontier_coverage_20": 0.17718027234077455, "rewards/frontier_coverage_25": 0.2612395048141479, "rewards/frontier_coverage_5": 0.022799749858677387, "rewards/frontier_entropy_batch_reward": -0.31192846298217775, "signal/accuracy_reward/centered_abs_mean": 0.1269965276122093, "signal/accuracy_reward/group_std_mean": 0.17362678050994873, "signal/accuracy_reward/group_zero_std_frac": 0.4888889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9380814790725708, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06349826380610465, "signal/advantage_abs_mean": 0.7406193017959595, "signal/advantage_pre_scale_abs_mean": 0.08632948994636536, "signal/advantage_pre_scale_std": 0.14735422730445863, "signal/advantage_std": 0.9830026030540466, "signal/brier_reward/centered_abs_mean": 0.11631175279617309, "signal/brier_reward/group_std_mean": 0.15423674881458282, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17220796644687653, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011631175130605697, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029228382930159568, "signal/confidence_uniqueness_reward/group_std_mean": 0.04904755130410195, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0437807485461235, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029228384140878917, "signal/format_reward/centered_abs_mean": 0.017263454757630824, "signal/format_reward/group_std_mean": 0.034723594039678576, "signal/format_reward/group_zero_std_frac": 0.85, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.13021927326917648, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.008631727378815412, "signal/frontier_coverage_0/centered_abs_mean": 0.15657298862934113, "signal/frontier_coverage_0/group_std_mean": 0.2082358866930008, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0331398393958807, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022389938123524187, "signal/frontier_coverage_1/centered_abs_mean": 0.15657298862934113, "signal/frontier_coverage_1/group_std_mean": 0.2082358866930008, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0331398393958807, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022389938123524187, "signal/frontier_coverage_10/centered_abs_mean": 0.07481402903795242, "signal/frontier_coverage_10/group_std_mean": 0.09677753150463105, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015874645672738552, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001069840556010604, "signal/frontier_coverage_15/centered_abs_mean": 0.07884364426136017, "signal/frontier_coverage_15/group_std_mean": 0.09854675233364105, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016783738508820534, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011274641146883368, "signal/frontier_coverage_20/centered_abs_mean": 0.11144341826438904, "signal/frontier_coverage_20/group_std_mean": 0.1409495711326599, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023704275116324426, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001593640772625804, "signal/frontier_coverage_25/centered_abs_mean": 0.15060859620571138, "signal/frontier_coverage_25/group_std_mean": 0.1917654901742935, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03201264552772045, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021537028020247816, "signal/frontier_coverage_5/centered_abs_mean": 0.1554807960987091, "signal/frontier_coverage_5/group_std_mean": 0.20682709217071532, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03290844485163689, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022233754862099886, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32440600991249086, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3896322250366211, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48102996349334715, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03244060054421425, "step": 535 }, { "calibration/aurc": 0.13244185631787503, "calibration/batch_distribution_entropy": 0.9800974535080833, "calibration/buffer_distribution_entropy": 0.9808648778370616, "calibration/confidence_entropy": 0.4809548343091509, "calibration/coverage@0%": 0.14100108636490322, "calibration/coverage@1%": 0.1484478948755415, "calibration/coverage@10%": 0.490198095221616, "calibration/coverage@15%": 0.6693744131198277, "calibration/coverage@20%": 0.7559760305725088, "calibration/coverage@25%": 0.8143260634639944, "calibration/coverage@30%": 0.8785299372239944, "calibration/coverage@5%": 0.3187934513805214, "calibration/ece": 0.17710937135755084, "calibration/mean_confidence": 0.4955213287013769, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020572916666666673, "completions/max_length": 3891.2, "completions/max_terminated_length": 3891.2, "completions/mean_length": 809.2749145507812, "completions/mean_terminated_length": 826.44189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 93.8, "epoch": 1.2975962800464993, "grad_norm": 0.0025462766643613577, "learning_rate": 3.0048076923076923e-06, "loss": -0.0439, "num_tokens": 1399767973.0, "reward": 1.0000099539756775, "reward_std": 0.1379528284072876, "rewards/accuracy_reward": 0.7075520634651185, "rewards/brier_reward": 0.8036871314048767, "rewards/confidence_uniqueness_reward": 0.9327346086502075, "rewards/format_reward": 0.9793402791023255, "rewards/frontier_coverage_0": 0.017869478557258844, "rewards/frontier_coverage_1": 0.017869478557258844, "rewards/frontier_coverage_10": 0.0430420383810997, "rewards/frontier_coverage_15": 0.09954206198453903, "rewards/frontier_coverage_20": 0.17041370272636414, "rewards/frontier_coverage_25": 0.25180783569812776, "rewards/frontier_coverage_5": 0.018168425746262075, "rewards/frontier_entropy_batch_reward": -0.2592599123716354, "signal/accuracy_reward/centered_abs_mean": 0.14659830629825593, "signal/accuracy_reward/group_std_mean": 0.1926487445831299, "signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.014528787136078, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07329915314912797, "signal/advantage_abs_mean": 0.7407657027244567, "signal/advantage_pre_scale_abs_mean": 0.10063390731811524, "signal/advantage_pre_scale_std": 0.17158576846122742, "signal/advantage_std": 0.9831032276153564, "signal/brier_reward/centered_abs_mean": 0.134641233086586, "signal/brier_reward/group_std_mean": 0.17501663267612458, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18607415556907653, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013464123010635376, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042544426023960115, "signal/confidence_uniqueness_reward/group_std_mean": 0.07108568400144577, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.058684717118740085, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004254442779347301, "signal/format_reward/centered_abs_mean": 0.03412543423473835, "signal/format_reward/group_std_mean": 0.06144065707921982, "signal/format_reward/group_zero_std_frac": 0.7555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.23496909141540528, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017062717117369175, "signal/frontier_coverage_0/centered_abs_mean": 0.19530532956123353, "signal/frontier_coverage_0/group_std_mean": 0.25145215094089507, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03858583122491836, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002792866202071309, "signal/frontier_coverage_1/centered_abs_mean": 0.19530532956123353, "signal/frontier_coverage_1/group_std_mean": 0.25145215094089507, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03858583122491836, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002792866202071309, "signal/frontier_coverage_10/centered_abs_mean": 0.07514613270759582, "signal/frontier_coverage_10/group_std_mean": 0.09620828628540039, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014837125316262245, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001074589730706066, "signal/frontier_coverage_15/centered_abs_mean": 0.07365463823080062, "signal/frontier_coverage_15/group_std_mean": 0.09266743957996368, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014589322358369827, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010532613145187498, "signal/frontier_coverage_20/centered_abs_mean": 0.10024979412555694, "signal/frontier_coverage_20/group_std_mean": 0.12663647830486296, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01986866146326065, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014335720334202051, "signal/frontier_coverage_25/centered_abs_mean": 0.1359792798757553, "signal/frontier_coverage_25/group_std_mean": 0.17195332646369935, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02694905437529087, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019445037469267845, "signal/frontier_coverage_5/centered_abs_mean": 0.19388082921504973, "signal/frontier_coverage_5/group_std_mean": 0.24967800378799437, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03830417841672897, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002772495849058032, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31604220271110534, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38787181973457335, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.43719064593315127, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03160422146320343, "step": 540 }, { "calibration/aurc": 0.19371810539249967, "calibration/batch_distribution_entropy": 0.9582393810078139, "calibration/buffer_distribution_entropy": 0.9807477018011488, "calibration/confidence_entropy": 0.48479958020192343, "calibration/coverage@0%": 0.009410442493643909, "calibration/coverage@1%": 0.009410442493643909, "calibration/coverage@10%": 0.2788743955445861, "calibration/coverage@15%": 0.41693663961923677, "calibration/coverage@20%": 0.5085180756830296, "calibration/coverage@25%": 0.7402995306519997, "calibration/coverage@30%": 0.8931141066789896, "calibration/coverage@5%": 0.10086368189135078, "calibration/ece": 0.12323604987285137, "calibration/mean_confidence": 0.5889928364967711, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007204861111111116, "completions/max_length": 3744.0, "completions/max_terminated_length": 3744.0, "completions/mean_length": 740.11953125, "completions/mean_terminated_length": 745.5853881835938, "completions/min_length": 0.0, "completions/min_terminated_length": 137.0, "epoch": 1.3095961300483743, "grad_norm": 0.002864128677174449, "learning_rate": 2.974759615384616e-06, "loss": -0.0146, "num_tokens": 1411338950.0, "reward": 0.9936476588249207, "reward_std": 0.12532853931188584, "rewards/accuracy_reward": 0.6754340291023254, "rewards/brier_reward": 0.8261975884437561, "rewards/confidence_uniqueness_reward": 0.9434968113899231, "rewards/format_reward": 0.9927083373069763, "rewards/frontier_coverage_0": 0.04778345115482807, "rewards/frontier_coverage_1": 0.04778345115482807, "rewards/frontier_coverage_10": 0.05499168708920479, "rewards/frontier_coverage_15": 0.10132022351026534, "rewards/frontier_coverage_20": 0.1685120642185211, "rewards/frontier_coverage_25": 0.244861963391304, "rewards/frontier_coverage_5": 0.047942586988210675, "rewards/frontier_entropy_batch_reward": -0.27591673135757444, "signal/accuracy_reward/centered_abs_mean": 0.14527452290058135, "signal/accuracy_reward/group_std_mean": 0.19745134711265563, "signal/accuracy_reward/group_zero_std_frac": 0.4083333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0105983018875122, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07263726145029067, "signal/advantage_abs_mean": 0.7285477638244628, "signal/advantage_pre_scale_abs_mean": 0.09042828679084777, "signal/advantage_pre_scale_std": 0.14941135048866272, "signal/advantage_std": 0.9830885171890259, "signal/brier_reward/centered_abs_mean": 0.11565537899732589, "signal/brier_reward/group_std_mean": 0.15386753976345063, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16175343692302704, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011565538495779038, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025201234966516495, "signal/confidence_uniqueness_reward/group_std_mean": 0.044658108800649646, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03534325771033764, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025201234966516495, "signal/format_reward/centered_abs_mean": 0.013400607462972402, "signal/format_reward/group_std_mean": 0.03037625327706337, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09462228938937187, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006700303731486201, "signal/frontier_coverage_0/centered_abs_mean": 0.16465333700180054, "signal/frontier_coverage_0/group_std_mean": 0.21388448178768157, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032869096100330356, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002354542585089803, "signal/frontier_coverage_1/centered_abs_mean": 0.16465333700180054, "signal/frontier_coverage_1/group_std_mean": 0.21388448178768157, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032869096100330356, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002354542585089803, "signal/frontier_coverage_10/centered_abs_mean": 0.06270370185375214, "signal/frontier_coverage_10/group_std_mean": 0.08065424412488938, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012526192888617515, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008966629044152796, "signal/frontier_coverage_15/centered_abs_mean": 0.07721384763717651, "signal/frontier_coverage_15/group_std_mean": 0.09745251387357712, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015420524403452873, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011041580000892282, "signal/frontier_coverage_20/centered_abs_mean": 0.11166613698005676, "signal/frontier_coverage_20/group_std_mean": 0.14150880575180053, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022289788722991942, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001596825709566474, "signal/frontier_coverage_25/centered_abs_mean": 0.15288293361663818, "signal/frontier_coverage_25/group_std_mean": 0.19415551722049712, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03050895407795906, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021862258901819585, "signal/frontier_coverage_5/centered_abs_mean": 0.1633177638053894, "signal/frontier_coverage_5/group_std_mean": 0.21221773326396942, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032602763175964354, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002335443953052163, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3143535256385803, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3824365258216858, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4391070544719696, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03143535330891609, "step": 545 }, { "calibration/aurc": 0.0995656417360469, "calibration/batch_distribution_entropy": 0.938787714013453, "calibration/buffer_distribution_entropy": 0.9802957592257414, "calibration/confidence_entropy": 0.4762113632355807, "calibration/coverage@0%": 0.06886015541822894, "calibration/coverage@1%": 0.20479765541822897, "calibration/coverage@10%": 0.6694310362170939, "calibration/coverage@15%": 0.7894442341243252, "calibration/coverage@20%": 0.8726531096446104, "calibration/coverage@25%": 0.9322158247251984, "calibration/coverage@30%": 0.9687002652519894, "calibration/coverage@5%": 0.33284931091946046, "calibration/ece": 0.13048564570174642, "calibration/mean_confidence": 0.6237522141039131, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555536, "completions/max_length": 3780.4, "completions/max_terminated_length": 3780.4, "completions/mean_length": 732.2462646484375, "completions/mean_terminated_length": 735.1727905273438, "completions/min_length": 0.0, "completions/min_terminated_length": 107.4, "epoch": 1.3215959800502493, "grad_norm": 0.002857534447684884, "learning_rate": 2.9447115384615386e-06, "loss": -0.0009, "num_tokens": 1422844827.0, "reward": 1.0154416918754579, "reward_std": 0.11865588426589965, "rewards/accuracy_reward": 0.723437511920929, "rewards/brier_reward": 0.8295330762863159, "rewards/confidence_uniqueness_reward": 0.944782269001007, "rewards/format_reward": 0.9960069417953491, "rewards/frontier_coverage_0": 0.014874590956605972, "rewards/frontier_coverage_1": 0.014878203091211618, "rewards/frontier_coverage_10": 0.04686977192759514, "rewards/frontier_coverage_15": 0.10758093893527984, "rewards/frontier_coverage_20": 0.1838034689426422, "rewards/frontier_coverage_25": 0.26971648037433626, "rewards/frontier_coverage_5": 0.01526981797069311, "rewards/frontier_entropy_batch_reward": -0.3104989051818848, "signal/accuracy_reward/centered_abs_mean": 0.1535481721162796, "signal/accuracy_reward/group_std_mean": 0.19500392973423003, "signal/accuracy_reward/group_zero_std_frac": 0.46666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.110567831993103, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0767740860581398, "signal/advantage_abs_mean": 0.7761712193489074, "signal/advantage_pre_scale_abs_mean": 0.0919352874159813, "signal/advantage_pre_scale_std": 0.143514946103096, "signal/advantage_std": 0.9830339431762696, "signal/brier_reward/centered_abs_mean": 0.11515444964170456, "signal/brier_reward/group_std_mean": 0.14821325838565827, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16740451157093048, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011515445262193679, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02092781737446785, "signal/confidence_uniqueness_reward/group_std_mean": 0.033663921803236005, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03046945817768574, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002092781872488558, "signal/format_reward/centered_abs_mean": 0.007443576492369175, "signal/format_reward/group_std_mean": 0.01733107175678015, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05412430316209793, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0037217882461845877, "signal/frontier_coverage_0/centered_abs_mean": 0.16963888108730316, "signal/frontier_coverage_0/group_std_mean": 0.21692575812339782, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03522733971476555, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024258360732346772, "signal/frontier_coverage_1/centered_abs_mean": 0.16954942643642426, "signal/frontier_coverage_1/group_std_mean": 0.2168179452419281, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035208532214164735, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002424556901678443, "signal/frontier_coverage_10/centered_abs_mean": 0.06202037930488587, "signal/frontier_coverage_10/group_std_mean": 0.07880822569131851, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012900578789412975, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008868913981132209, "signal/frontier_coverage_15/centered_abs_mean": 0.07617910951375961, "signal/frontier_coverage_15/group_std_mean": 0.0960146278142929, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015834695287048818, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010893612634390593, "signal/frontier_coverage_20/centered_abs_mean": 0.10935810059309006, "signal/frontier_coverage_20/group_std_mean": 0.1381034791469574, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022717427089810373, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015638208482414485, "signal/frontier_coverage_25/centered_abs_mean": 0.14982065558433533, "signal/frontier_coverage_25/group_std_mean": 0.18926362693309784, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031108209863305092, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021424354054033756, "signal/frontier_coverage_5/centered_abs_mean": 0.1681896448135376, "signal/frontier_coverage_5/group_std_mean": 0.2151541143655777, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03492706418037415, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024051119573414324, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32938060760498045, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3938984632492065, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4788420915603638, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032938060164451596, "step": 550 }, { "epoch": 1.3215959800502493, "eval_calibration/aurc": 0.12777494995730462, "eval_calibration/batch_distribution_entropy": 0.9146714874440477, "eval_calibration/buffer_distribution_entropy": 0.97970379700712, "eval_calibration/confidence_entropy": 0.4720832389869453, "eval_calibration/coverage@0%": 0.2565524193548387, "eval_calibration/coverage@1%": 0.2565524193548387, "eval_calibration/coverage@10%": 0.5593077956989247, "eval_calibration/coverage@15%": 0.679771505376344, "eval_calibration/coverage@20%": 0.7795698924731184, "eval_calibration/coverage@25%": 0.9163306451612904, "eval_calibration/coverage@30%": 0.9739583333333334, "eval_calibration/coverage@5%": 0.2878024193548387, "eval_calibration/ece": 0.19415833333333332, "eval_calibration/mean_confidence": 0.5893913306451612, "eval_completions/clipped_ratio": 0.002604166666666685, "eval_completions/max_length": 2354.5, "eval_completions/max_terminated_length": 2354.5, "eval_completions/mean_length": 758.33837890625, "eval_completions/mean_terminated_length": 760.3116963704427, "eval_completions/min_length": 99.83333333333333, "eval_completions/min_terminated_length": 185.16666666666666, "eval_loss": 0.0, "eval_num_tokens": 1422844827.0, "eval_reward": 0.9168184598286947, "eval_reward_std": 0.23817753295103708, "eval_rewards/accuracy_reward": 0.6710069477558136, "eval_rewards/brier_reward": 0.830171674489975, "eval_rewards/confidence_uniqueness_reward": 0.8910415371259054, "eval_rewards/format_reward": 0.9973958432674408, "eval_rewards/frontier_coverage_0": 0.04973413205395142, "eval_rewards/frontier_coverage_1": 0.049713826117416225, "eval_rewards/frontier_coverage_10": 0.051547558357318245, "eval_rewards/frontier_coverage_15": 0.10180203368266423, "eval_rewards/frontier_coverage_20": 0.1691055049498876, "eval_rewards/frontier_coverage_25": 0.24416544288396835, "eval_rewards/frontier_coverage_5": 0.049687957080701985, "eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408, "eval_runtime": 166.8536, "eval_samples_per_second": 5.993, "eval_signal/accuracy_reward/centered_abs_mean": 0.4271375884612401, "eval_signal/accuracy_reward/group_std_mean": 0.46825483938058216, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9032614231109619, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.21356879423062006, "eval_signal/advantage_abs_mean": 0.8906104365984598, "eval_signal/advantage_pre_scale_abs_mean": 0.21314153323570886, "eval_signal/advantage_pre_scale_std": 0.23594039926926294, "eval_signal/advantage_std": 0.9864058494567871, "eval_signal/brier_reward/centered_abs_mean": 0.16124566892782846, "eval_signal/brier_reward/group_std_mean": 0.21817840884129205, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0682522679368655, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01612456701695919, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04524739272892475, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.060582934568325676, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019175103555123012, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004524739536767204, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010400833562016487, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2642584939797719, "eval_signal/frontier_coverage_0/group_std_mean": 0.36403438945611316, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016022339463233948, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037788964497546353, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26409488171339035, "eval_signal/frontier_coverage_1/group_std_mean": 0.36384013791879016, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016012390454610188, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003776557006252309, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.0783597007393837, "eval_signal/frontier_coverage_10/group_std_mean": 0.10474599276979764, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004751801490783691, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011205436894670129, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12035289034247398, "eval_signal/frontier_coverage_15/group_std_mean": 0.1514952356616656, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007285447558388114, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001721046263507257, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20810386538505554, "eval_signal/frontier_coverage_20/group_std_mean": 0.2537065049012502, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012596863321959972, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029758852906525135, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3042859335740407, "eval_signal/frontier_coverage_25/group_std_mean": 0.366447314620018, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018416117566327255, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004351288701097171, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2619953279693921, "eval_signal/frontier_coverage_5/group_std_mean": 0.361324484149615, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015885391427824896, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003746533145507177, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0020801667124032974, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572955471774, "eval_steps_per_second": 0.036, "step": 550 }, { "epoch": 1.3215959800502493, "step": 550, "train_probe_calibration/aurc": 0.17908947204745143, "train_probe_calibration/batch_distribution_entropy": 0.9130333424554435, "train_probe_calibration/buffer_distribution_entropy": 0.9798709378353916, "train_probe_calibration/confidence_entropy": 0.49644389175607556, "train_probe_calibration/coverage@0%": 0.17361111111111113, "train_probe_calibration/coverage@1%": 0.17361111111111113, "train_probe_calibration/coverage@10%": 0.38472222222222224, "train_probe_calibration/coverage@15%": 0.6371527777777778, "train_probe_calibration/coverage@20%": 0.7107638888888889, "train_probe_calibration/coverage@25%": 0.8312499999999999, "train_probe_calibration/coverage@30%": 0.9409722222222222, "train_probe_calibration/coverage@5%": 0.2152777777777778, "train_probe_calibration/ece": 0.20402147569444443, "train_probe_calibration/mean_confidence": 0.5926106076388887, "train_probe_completions/clipped_ratio": 0.0026041666666666665, "train_probe_completions/max_length": 2099.6666666666665, "train_probe_completions/max_terminated_length": 2099.6666666666665, "train_probe_completions/mean_length": 734.828135172526, "train_probe_completions/mean_terminated_length": 736.7092793782552, "train_probe_completions/min_length": 105.83333333333333, "train_probe_completions/min_terminated_length": 148.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 1422844827.0, "train_probe_reward": 0.9271653195222219, "train_probe_reward_std": 0.23420592894156775, "train_probe_rewards/accuracy_reward": 0.6935763955116272, "train_probe_rewards/brier_reward": 0.8282076120376587, "train_probe_rewards/confidence_uniqueness_reward": 0.8932102719942728, "train_probe_rewards/format_reward": 0.996527781089147, "train_probe_rewards/frontier_coverage_0": 0.03307745155567924, "train_probe_rewards/frontier_coverage_1": 0.033096089803924165, "train_probe_rewards/frontier_coverage_10": 0.04708883538842201, "train_probe_rewards/frontier_coverage_15": 0.10199795787533124, "train_probe_rewards/frontier_coverage_20": 0.17264153808355331, "train_probe_rewards/frontier_coverage_25": 0.2519413009285927, "train_probe_rewards/frontier_coverage_5": 0.03317807226752242, "train_probe_rewards/frontier_entropy_batch_reward": -0.996527781089147, "train_probe_runtime": 161.636, "train_probe_samples_per_second": 6.187, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.4127061615387599, "train_probe_signal/accuracy_reward/group_std_mean": 0.4601968179146449, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8917946914831797, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.20635308076937994, "train_probe_signal/advantage_abs_mean": 0.8753375907739004, "train_probe_signal/advantage_pre_scale_abs_mean": 0.20616419365008673, "train_probe_signal/advantage_pre_scale_std": 0.2326151430606842, "train_probe_signal/advantage_std": 0.9863971670468649, "train_probe_signal/brier_reward/centered_abs_mean": 0.16185809671878815, "train_probe_signal/brier_reward/group_std_mean": 0.2190844938158989, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07006527669727802, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016185809237261612, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04507234009603659, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05927520431578159, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01946852883944909, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045072339319934445, "train_probe_signal/format_reward/centered_abs_mean": 0.006618923507630825, "train_probe_signal/format_reward/group_std_mean": 0.01665244624018669, "train_probe_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.013900812404851118, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0033094617538154125, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2572428708275159, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.36240187784036, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015925037519385416, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036785730238383016, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2570945918560028, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.3622182110945384, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01591583030919234, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003676452557556331, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07643753911058108, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.1038547232747078, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004740113392472267, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010930567999215175, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11760762209693591, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.14834488679965338, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0072829612375547486, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016817889603165288, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.20278030882279077, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.247420996427536, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012549723964184523, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002899758517742157, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.29607370992501575, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.35755864282449085, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018319410582383473, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004233853969102104, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2552005996306737, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3598967989285787, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015798571209112804, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036493684941281876, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.006618923507630825, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.01665244624018669, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0027801623412718377, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0006618923507630825, "train_probe_steps_per_second": 0.037 }, { "calibration/aurc": 0.19663234235869248, "calibration/batch_distribution_entropy": 0.9651647751097935, "calibration/buffer_distribution_entropy": 0.979815394719828, "calibration/confidence_entropy": 0.4981850665533334, "calibration/coverage@0%": 0.019349941668759134, "calibration/coverage@1%": 0.019349941668759134, "calibration/coverage@10%": 0.3412770994600119, "calibration/coverage@15%": 0.5117494931015768, "calibration/coverage@20%": 0.5975047467954091, "calibration/coverage@25%": 0.6728722988900216, "calibration/coverage@30%": 0.7757951279144336, "calibration/coverage@5%": 0.06757603701182885, "calibration/ece": 0.14719846197953987, "calibration/mean_confidence": 0.5519945923747531, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003298611111111116, "completions/max_length": 3510.8, "completions/max_terminated_length": 3510.8, "completions/mean_length": 757.6384521484375, "completions/mean_terminated_length": 760.1795532226563, "completions/min_length": 0.0, "completions/min_terminated_length": 145.0, "epoch": 1.3335958300521242, "grad_norm": 0.0024476582184433937, "learning_rate": 2.9146634615384615e-06, "loss": -0.0037, "num_tokens": 1434677462.0, "reward": 1.0103063583374023, "reward_std": 0.10284363478422165, "rewards/accuracy_reward": 0.7134548544883728, "rewards/brier_reward": 0.8260629892349243, "rewards/confidence_uniqueness_reward": 0.9457016348838806, "rewards/format_reward": 0.9967013835906983, "rewards/frontier_coverage_0": 0.023881956841796635, "rewards/frontier_coverage_1": 0.023881740309298037, "rewards/frontier_coverage_10": 0.048338998854160306, "rewards/frontier_coverage_15": 0.10219060182571411, "rewards/frontier_coverage_20": 0.1739596724510193, "rewards/frontier_coverage_25": 0.2564005136489868, "rewards/frontier_coverage_5": 0.024192382209002973, "rewards/frontier_entropy_batch_reward": -0.31283934116363527, "signal/accuracy_reward/centered_abs_mean": 0.12223849892616272, "signal/accuracy_reward/group_std_mean": 0.1589464396238327, "signal/accuracy_reward/group_zero_std_frac": 0.55277778506279, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9898079633712769, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06111924946308136, "signal/advantage_abs_mean": 0.7705781579017639, "signal/advantage_pre_scale_abs_mean": 0.07884201258420945, "signal/advantage_pre_scale_std": 0.1269924134016037, "signal/advantage_std": 0.9828511595726013, "signal/brier_reward/centered_abs_mean": 0.11215368509292603, "signal/brier_reward/group_std_mean": 0.1447371155023575, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18304523229598998, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011215368844568729, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01934829242527485, "signal/confidence_uniqueness_reward/group_std_mean": 0.02940821126103401, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03181086927652359, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019348292844370008, "signal/format_reward/centered_abs_mean": 0.0060004339087754485, "signal/format_reward/group_std_mean": 0.012914158403873444, "signal/format_reward/group_zero_std_frac": 0.9416666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04921326451003551, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0030002169543877242, "signal/frontier_coverage_0/centered_abs_mean": 0.16506983935832978, "signal/frontier_coverage_0/group_std_mean": 0.2103798657655716, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03838563859462738, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023604985792189835, "signal/frontier_coverage_1/centered_abs_mean": 0.1649801552295685, "signal/frontier_coverage_1/group_std_mean": 0.21026895344257354, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038364893198013304, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002359216194599867, "signal/frontier_coverage_10/centered_abs_mean": 0.06287136897444726, "signal/frontier_coverage_10/group_std_mean": 0.07955214679241181, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014653045125305653, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008990605594590307, "signal/frontier_coverage_15/centered_abs_mean": 0.07382966876029969, "signal/frontier_coverage_15/group_std_mean": 0.09216237664222718, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01734896432608366, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010557642672210932, "signal/frontier_coverage_20/centered_abs_mean": 0.10129383057355881, "signal/frontier_coverage_20/group_std_mean": 0.12719354182481765, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023834266886115073, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014485017396509647, "signal/frontier_coverage_25/centered_abs_mean": 0.13603402376174928, "signal/frontier_coverage_25/group_std_mean": 0.17175181806087494, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031987834721803665, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019452865933999419, "signal/frontier_coverage_5/centered_abs_mean": 0.1638639748096466, "signal/frontier_coverage_5/group_std_mean": 0.20887594819068908, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03810485303401947, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023432548390701414, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3287335276603699, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3981877684593201, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5402589082717896, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03287335261702538, "step": 555 }, { "calibration/aurc": 0.12372041385720985, "calibration/batch_distribution_entropy": 0.9359188147593194, "calibration/buffer_distribution_entropy": 0.9801759788746939, "calibration/confidence_entropy": 0.46966744545122846, "calibration/coverage@0%": 0.022477618761264504, "calibration/coverage@1%": 0.07574132633306607, "calibration/coverage@10%": 0.5220841856664131, "calibration/coverage@15%": 0.6662842303817766, "calibration/coverage@20%": 0.8679514517074074, "calibration/coverage@25%": 0.9368390992167102, "calibration/coverage@30%": 0.9577023498694517, "calibration/coverage@5%": 0.22553054521905386, "calibration/ece": 0.1454074541603872, "calibration/mean_confidence": 0.620010325011976, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003993055555555558, "completions/max_length": 3796.8, "completions/max_terminated_length": 3796.8, "completions/mean_length": 820.9757080078125, "completions/mean_terminated_length": 824.318359375, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 1.3455956800539992, "grad_norm": 0.0024596690200269222, "learning_rate": 2.8846153846153845e-06, "loss": 0.0007, "num_tokens": 1447224798.0, "reward": 1.016961658000946, "reward_std": 0.1075833186507225, "rewards/accuracy_reward": 0.7207465171813965, "rewards/brier_reward": 0.8354118824005127, "rewards/confidence_uniqueness_reward": 0.945723009109497, "rewards/format_reward": 0.9960069537162781, "rewards/frontier_coverage_0": 0.031797058135271075, "rewards/frontier_coverage_1": 0.03180254213511944, "rewards/frontier_coverage_10": 0.05350678041577339, "rewards/frontier_coverage_15": 0.11030341684818268, "rewards/frontier_coverage_20": 0.1851776123046875, "rewards/frontier_coverage_25": 0.2708742439746857, "rewards/frontier_coverage_5": 0.0322908416390419, "rewards/frontier_entropy_batch_reward": -0.29763842225074766, "signal/accuracy_reward/centered_abs_mean": 0.1304633229970932, "signal/accuracy_reward/group_std_mean": 0.1730515480041504, "signal/accuracy_reward/group_zero_std_frac": 0.5000000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0257725954055785, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0652316614985466, "signal/advantage_abs_mean": 0.7648744463920594, "signal/advantage_pre_scale_abs_mean": 0.08146732598543167, "signal/advantage_pre_scale_std": 0.1303176298737526, "signal/advantage_std": 0.982902467250824, "signal/brier_reward/centered_abs_mean": 0.11421165615320206, "signal/brier_reward/group_std_mean": 0.14783188402652742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18043694496154786, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01142116542905569, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02011672258377075, "signal/confidence_uniqueness_reward/group_std_mean": 0.030361873283982276, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03180941939353943, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020116724306717517, "signal/format_reward/centered_abs_mean": 0.006575520941987633, "signal/format_reward/group_std_mean": 0.01385612040758133, "signal/format_reward/group_zero_std_frac": 0.9361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05119709745049476, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032877604709938167, "signal/frontier_coverage_0/centered_abs_mean": 0.1728944033384323, "signal/frontier_coverage_0/group_std_mean": 0.22030304074287416, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03905735611915588, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002472389955073595, "signal/frontier_coverage_1/centered_abs_mean": 0.172818061709404, "signal/frontier_coverage_1/group_std_mean": 0.22020695805549623, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03904041945934296, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002471298351883888, "signal/frontier_coverage_10/centered_abs_mean": 0.06365747526288032, "signal/frontier_coverage_10/group_std_mean": 0.08029326051473618, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014403184317052365, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009103018906898797, "signal/frontier_coverage_15/centered_abs_mean": 0.07621516734361648, "signal/frontier_coverage_15/group_std_mean": 0.09550768882036209, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017270967923104764, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010898768436163665, "signal/frontier_coverage_20/centered_abs_mean": 0.10459608137607575, "signal/frontier_coverage_20/group_std_mean": 0.13220926523208618, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02368568480014801, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001495723961852491, "signal/frontier_coverage_25/centered_abs_mean": 0.13965673446655275, "signal/frontier_coverage_25/group_std_mean": 0.17762815058231354, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031598026677966115, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019970911787822844, "signal/frontier_coverage_5/centered_abs_mean": 0.17128887474536897, "signal/frontier_coverage_5/group_std_mean": 0.2183428555727005, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038694722950458525, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024494309443980457, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3275206506252289, "signal/frontier_entropy_batch_reward/group_std_mean": 0.393954998254776, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5186434030532837, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032752066105604175, "step": 560 }, { "calibration/aurc": 0.11484324392938003, "calibration/batch_distribution_entropy": 0.9461764735370636, "calibration/buffer_distribution_entropy": 0.9800952762111569, "calibration/confidence_entropy": 0.49656747207777396, "calibration/coverage@0%": 0.13764694219018028, "calibration/coverage@1%": 0.14077194219018027, "calibration/coverage@10%": 0.34883286118752554, "calibration/coverage@15%": 0.8183734403223412, "calibration/coverage@20%": 0.944526908026836, "calibration/coverage@25%": 0.9843204977967833, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.1814599271535877, "calibration/ece": 0.18485625489936197, "calibration/mean_confidence": 0.6121328205199128, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666652, "completions/max_length": 3722.8, "completions/max_terminated_length": 3722.8, "completions/mean_length": 829.382568359375, "completions/mean_terminated_length": 833.634375, "completions/min_length": 0.0, "completions/min_terminated_length": 143.0, "epoch": 1.3575955300558742, "grad_norm": 0.0024535465054214, "learning_rate": 2.8545673076923082e-06, "loss": -0.011, "num_tokens": 1459857813.0, "reward": 1.01730078458786, "reward_std": 0.10536360442638397, "rewards/accuracy_reward": 0.7284722328186035, "rewards/brier_reward": 0.8310370564460754, "rewards/confidence_uniqueness_reward": 0.9439226388931274, "rewards/format_reward": 0.9947048664093018, "rewards/frontier_coverage_0": 0.017816638201475145, "rewards/frontier_coverage_1": 0.017861245200037956, "rewards/frontier_coverage_10": 0.047358321771025655, "rewards/frontier_coverage_15": 0.10612278282642365, "rewards/frontier_coverage_20": 0.18106147646903992, "rewards/frontier_coverage_25": 0.2659234404563904, "rewards/frontier_coverage_5": 0.018120815977454185, "rewards/frontier_entropy_batch_reward": -0.3113971471786499, "signal/accuracy_reward/centered_abs_mean": 0.11510416865348816, "signal/accuracy_reward/group_std_mean": 0.1561041682958603, "signal/accuracy_reward/group_zero_std_frac": 0.5361111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8854737877845764, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05755208432674408, "signal/advantage_abs_mean": 0.7564275026321411, "signal/advantage_pre_scale_abs_mean": 0.07891413271427154, "signal/advantage_pre_scale_std": 0.12910031527280807, "signal/advantage_std": 0.9829351425170898, "signal/brier_reward/centered_abs_mean": 0.10529440641403198, "signal/brier_reward/group_std_mean": 0.13557455837726592, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16216840744018554, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010529440827667713, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02240469716489315, "signal/confidence_uniqueness_reward/group_std_mean": 0.03349938876926899, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034209462255239485, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022404698422178624, "signal/format_reward/centered_abs_mean": 0.009195963526144624, "signal/format_reward/group_std_mean": 0.01742637250572443, "signal/format_reward/group_zero_std_frac": 0.9277777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06746506839990615, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004597981763072312, "signal/frontier_coverage_0/centered_abs_mean": 0.15199373960494994, "signal/frontier_coverage_0/group_std_mean": 0.19706369042396546, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033572905138134955, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021735104732215405, "signal/frontier_coverage_1/centered_abs_mean": 0.15193078815937042, "signal/frontier_coverage_1/group_std_mean": 0.19698142111301423, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03355920016765594, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021726103965193032, "signal/frontier_coverage_10/centered_abs_mean": 0.05758165866136551, "signal/frontier_coverage_10/group_std_mean": 0.07300378382205963, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012791383638978004, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008234177017584443, "signal/frontier_coverage_15/centered_abs_mean": 0.07520065009593964, "signal/frontier_coverage_15/group_std_mean": 0.0933651715517044, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016745933331549168, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010753692826256157, "signal/frontier_coverage_20/centered_abs_mean": 0.10440513789653778, "signal/frontier_coverage_20/group_std_mean": 0.13062019646167755, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023217468336224557, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014929935336112977, "signal/frontier_coverage_25/centered_abs_mean": 0.13932709842920304, "signal/frontier_coverage_25/group_std_mean": 0.1753379374742508, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030945189669728278, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001992377499118447, "signal/frontier_coverage_5/centered_abs_mean": 0.15076255798339844, "signal/frontier_coverage_5/group_std_mean": 0.1955201655626297, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03330030217766762, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021559046115726234, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32999433279037477, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39727323651313784, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5139553189277649, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03299943394958973, "step": 565 }, { "calibration/aurc": 0.09963432050211549, "calibration/batch_distribution_entropy": 0.980695428798073, "calibration/buffer_distribution_entropy": 0.980368388300457, "calibration/confidence_entropy": 0.471962507140043, "calibration/coverage@0%": 0.1962892479198079, "calibration/coverage@1%": 0.30380368012509074, "calibration/coverage@10%": 0.6094211586251187, "calibration/coverage@15%": 0.706083951108505, "calibration/coverage@20%": 0.7958589867091082, "calibration/coverage@25%": 0.8836823588540794, "calibration/coverage@30%": 0.9326827497626626, "calibration/coverage@5%": 0.48807606662199143, "calibration/ece": 0.16809385986656594, "calibration/mean_confidence": 0.5347295725900152, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008767361111111116, "completions/max_length": 3635.4, "completions/max_terminated_length": 3635.4, "completions/mean_length": 893.3748413085938, "completions/mean_terminated_length": 901.4503173828125, "completions/min_length": 0.0, "completions/min_terminated_length": 179.0, "epoch": 1.3695953800577492, "grad_norm": 0.0024552466347813606, "learning_rate": 2.8245192307692307e-06, "loss": -0.0139, "num_tokens": 1473233363.0, "reward": 1.006178867816925, "reward_std": 0.11279452443122864, "rewards/accuracy_reward": 0.7065972328186035, "rewards/brier_reward": 0.8198230504989624, "rewards/confidence_uniqueness_reward": 0.942148756980896, "rewards/format_reward": 0.9907986044883728, "rewards/frontier_coverage_0": 0.026942870020866393, "rewards/frontier_coverage_1": 0.026963303238153456, "rewards/frontier_coverage_10": 0.050035931169986725, "rewards/frontier_coverage_15": 0.10418967604637146, "rewards/frontier_coverage_20": 0.17468074858188629, "rewards/frontier_coverage_25": 0.253691965341568, "rewards/frontier_coverage_5": 0.027084483951330184, "rewards/frontier_entropy_batch_reward": -0.282056000828743, "signal/accuracy_reward/centered_abs_mean": 0.12428385615348816, "signal/accuracy_reward/group_std_mean": 0.1653681844472885, "signal/accuracy_reward/group_zero_std_frac": 0.5222222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9815640449523926, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06214192807674408, "signal/advantage_abs_mean": 0.7617440342903137, "signal/advantage_pre_scale_abs_mean": 0.08372949510812759, "signal/advantage_pre_scale_std": 0.14188626110553743, "signal/advantage_std": 0.9829033493995667, "signal/brier_reward/centered_abs_mean": 0.11303541958332061, "signal/brier_reward/group_std_mean": 0.148781681060791, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17894803285598754, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011303541995584965, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02599692568182945, "signal/confidence_uniqueness_reward/group_std_mean": 0.042301306128501893, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041353125125169754, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002599692577496171, "signal/format_reward/centered_abs_mean": 0.01422526054084301, "signal/format_reward/group_std_mean": 0.0282505813986063, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11385444700717925, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007112630270421505, "signal/frontier_coverage_0/centered_abs_mean": 0.16342126429080964, "signal/frontier_coverage_0/group_std_mean": 0.2136039435863495, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03702799454331398, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002336924057453871, "signal/frontier_coverage_1/centered_abs_mean": 0.1633344203233719, "signal/frontier_coverage_1/group_std_mean": 0.21349417567253112, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03700846284627914, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023356821853667497, "signal/frontier_coverage_10/centered_abs_mean": 0.062467949092388154, "signal/frontier_coverage_10/group_std_mean": 0.07969342619180679, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014144686982035637, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008932916796766221, "signal/frontier_coverage_15/centered_abs_mean": 0.07396685630083084, "signal/frontier_coverage_15/group_std_mean": 0.09205316007137299, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01673112381249666, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010577260982245207, "signal/frontier_coverage_20/centered_abs_mean": 0.10074280351400375, "signal/frontier_coverage_20/group_std_mean": 0.12560753375291825, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02277975045144558, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014406220987439155, "signal/frontier_coverage_25/centered_abs_mean": 0.1342226967215538, "signal/frontier_coverage_25/group_std_mean": 0.16809515953063964, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03034891076385975, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019193845801055431, "signal/frontier_coverage_5/centered_abs_mean": 0.16212076246738433, "signal/frontier_coverage_5/group_std_mean": 0.21197022199630738, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03673520609736443, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023183269426226617, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281023442745209, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39455527663230894, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5189752340316772, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281023427844047, "step": 570 }, { "calibration/aurc": 0.11028525101347983, "calibration/batch_distribution_entropy": 0.9566430480940576, "calibration/buffer_distribution_entropy": 0.9804901972981563, "calibration/confidence_entropy": 0.47551363461978974, "calibration/coverage@0%": 0.11289720219401884, "calibration/coverage@1%": 0.20973930745717673, "calibration/coverage@10%": 0.6089289139266566, "calibration/coverage@15%": 0.6945457885198006, "calibration/coverage@20%": 0.7950429915606329, "calibration/coverage@25%": 0.852454105806849, "calibration/coverage@30%": 0.897395771365383, "calibration/coverage@5%": 0.44135904537458914, "calibration/ece": 0.1606364850593023, "calibration/mean_confidence": 0.5495700747544723, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013020833333333325, "completions/max_length": 3925.6, "completions/max_terminated_length": 3925.6, "completions/mean_length": 1019.6723388671875, "completions/mean_terminated_length": 1033.4481079101563, "completions/min_length": 0.0, "completions/min_terminated_length": 215.8, "epoch": 1.3815952300596241, "grad_norm": 0.002250150078907609, "learning_rate": 2.7944711538461537e-06, "loss": -0.029, "num_tokens": 1488053364.0, "reward": 1.0047349095344544, "reward_std": 0.12467042356729507, "rewards/accuracy_reward": 0.7088541626930237, "rewards/brier_reward": 0.8231329202651978, "rewards/confidence_uniqueness_reward": 0.9376375079154968, "rewards/format_reward": 0.9868923544883728, "rewards/frontier_coverage_0": 0.02870071791112423, "rewards/frontier_coverage_1": 0.028726204484701156, "rewards/frontier_coverage_10": 0.05221306309103966, "rewards/frontier_coverage_15": 0.10783710926771164, "rewards/frontier_coverage_20": 0.17996051013469697, "rewards/frontier_coverage_25": 0.25985406041145326, "rewards/frontier_coverage_5": 0.02900756411254406, "rewards/frontier_entropy_batch_reward": -0.29029480218887327, "signal/accuracy_reward/centered_abs_mean": 0.12949218600988388, "signal/accuracy_reward/group_std_mean": 0.1760840207338333, "signal/accuracy_reward/group_zero_std_frac": 0.48888890743255614, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9506262302398681, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06474609300494194, "signal/advantage_abs_mean": 0.7400188684463501, "signal/advantage_pre_scale_abs_mean": 0.08855971843004226, "signal/advantage_pre_scale_std": 0.15420872271060942, "signal/advantage_std": 0.9829932928085328, "signal/brier_reward/centered_abs_mean": 0.11222950965166092, "signal/brier_reward/group_std_mean": 0.15040762722492218, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16671662628650666, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011222951300442218, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03410551249980927, "signal/confidence_uniqueness_reward/group_std_mean": 0.05924170911312103, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05008783340454102, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0034105512779206036, "signal/format_reward/centered_abs_mean": 0.023562283255159855, "signal/format_reward/group_std_mean": 0.04704947955906391, "signal/format_reward/group_zero_std_frac": 0.8, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1705361783504486, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011781141627579927, "signal/frontier_coverage_0/centered_abs_mean": 0.16117251217365264, "signal/frontier_coverage_0/group_std_mean": 0.211995929479599, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034183626621961595, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023047670256346464, "signal/frontier_coverage_1/centered_abs_mean": 0.16109153926372527, "signal/frontier_coverage_1/group_std_mean": 0.2118909776210785, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03416657708585262, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002303608926013112, "signal/frontier_coverage_10/centered_abs_mean": 0.06008915230631828, "signal/frontier_coverage_10/group_std_mean": 0.07626638561487198, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012836772203445434, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008592748898081481, "signal/frontier_coverage_15/centered_abs_mean": 0.07080269902944565, "signal/frontier_coverage_15/group_std_mean": 0.08868333101272582, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015173521265387535, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010124785592779517, "signal/frontier_coverage_20/centered_abs_mean": 0.09740875363349914, "signal/frontier_coverage_20/group_std_mean": 0.12245101034641266, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020854856446385385, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001392945135012269, "signal/frontier_coverage_25/centered_abs_mean": 0.13047325909137725, "signal/frontier_coverage_25/group_std_mean": 0.16472874879837035, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027888312563300134, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018657675478607415, "signal/frontier_coverage_5/centered_abs_mean": 0.16004208326339722, "signal/frontier_coverage_5/group_std_mean": 0.2105341762304306, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03394476734101772, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002288601826876402, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3203289210796356, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3883971631526947, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4795925676822662, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032032891362905505, "step": 575 }, { "calibration/aurc": 0.09345142525102028, "calibration/batch_distribution_entropy": 0.9630027341939942, "calibration/buffer_distribution_entropy": 0.9820467419308796, "calibration/confidence_entropy": 0.48253184391156057, "calibration/coverage@0%": 0.15545568829074058, "calibration/coverage@1%": 0.23777337060842285, "calibration/coverage@10%": 0.6805700522287482, "calibration/coverage@15%": 0.7847976560103878, "calibration/coverage@20%": 0.8642587539374403, "calibration/coverage@25%": 0.902133177056626, "calibration/coverage@30%": 0.9395721925133689, "calibration/coverage@5%": 0.5479745475143784, "calibration/ece": 0.21899418926214792, "calibration/mean_confidence": 0.5576070825955319, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.043315972222222235, "completions/max_length": 3999.4, "completions/max_terminated_length": 3999.4, "completions/mean_length": 1091.949755859375, "completions/mean_terminated_length": 1141.4026123046874, "completions/min_length": 0.0, "completions/min_terminated_length": 211.2, "epoch": 1.3935950800614991, "grad_norm": 0.002100614598020911, "learning_rate": 2.7644230769230775e-06, "loss": -0.114, "num_tokens": 1503724433.0, "reward": 0.9761302590370178, "reward_std": 0.1780338317155838, "rewards/accuracy_reward": 0.7019097208976746, "rewards/brier_reward": 0.7873495817184448, "rewards/confidence_uniqueness_reward": 0.9076419115066529, "rewards/format_reward": 0.9567708492279052, "rewards/frontier_coverage_0": 0.00892861601896584, "rewards/frontier_coverage_1": 0.00892244540154934, "rewards/frontier_coverage_10": 0.047091028094291686, "rewards/frontier_coverage_15": 0.10459020435810089, "rewards/frontier_coverage_20": 0.17623171508312224, "rewards/frontier_coverage_25": 0.25542102158069613, "rewards/frontier_coverage_5": 0.009725382318720222, "rewards/frontier_entropy_batch_reward": -0.31445170640945436, "signal/accuracy_reward/centered_abs_mean": 0.15259331464767456, "signal/accuracy_reward/group_std_mean": 0.2111268609762192, "signal/accuracy_reward/group_zero_std_frac": 0.3638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8467071652412415, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07629665732383728, "signal/advantage_abs_mean": 0.710084867477417, "signal/advantage_pre_scale_abs_mean": 0.12264825254678727, "signal/advantage_pre_scale_std": 0.21392209231853485, "signal/advantage_std": 0.9833568692207336, "signal/brier_reward/centered_abs_mean": 0.14704960882663726, "signal/brier_reward/group_std_mean": 0.19571054577827454, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16416477262973786, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014704960770905018, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07570276707410813, "signal/confidence_uniqueness_reward/group_std_mean": 0.1249046117067337, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08455845564603806, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007570276688784361, "signal/format_reward/centered_abs_mean": 0.0701388880610466, "signal/format_reward/group_std_mean": 0.1196043387055397, "signal/format_reward/group_zero_std_frac": 0.5527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.39078201055526735, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0350694440305233, "signal/frontier_coverage_0/centered_abs_mean": 0.17277185022830963, "signal/frontier_coverage_0/group_std_mean": 0.2236211121082306, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.027412646636366843, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024706375785171985, "signal/frontier_coverage_1/centered_abs_mean": 0.17267104387283325, "signal/frontier_coverage_1/group_std_mean": 0.22349391877651215, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02739631161093712, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024691958911716937, "signal/frontier_coverage_10/centered_abs_mean": 0.06385519728064537, "signal/frontier_coverage_10/group_std_mean": 0.08039165139198304, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.010238087736070156, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009131293511018157, "signal/frontier_coverage_15/centered_abs_mean": 0.07462313622236252, "signal/frontier_coverage_15/group_std_mean": 0.0929687038064003, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01205196175724268, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010671108029782772, "signal/frontier_coverage_20/centered_abs_mean": 0.10192661881446838, "signal/frontier_coverage_20/group_std_mean": 0.12763854265213012, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.016460489854216574, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014575506327673792, "signal/frontier_coverage_25/centered_abs_mean": 0.1360788583755493, "signal/frontier_coverage_25/group_std_mean": 0.17146177887916564, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021944852918386458, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019459277391433716, "signal/frontier_coverage_5/centered_abs_mean": 0.17097091376781465, "signal/frontier_coverage_5/group_std_mean": 0.2213510900735855, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.027126950025558472, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002444884181022644, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34023687839508054, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4068700850009918, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3833127558231354, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03402368724346161, "step": 580 }, { "calibration/aurc": 0.17046566121396417, "calibration/batch_distribution_entropy": 0.9610891477230847, "calibration/buffer_distribution_entropy": 0.9820212064514543, "calibration/confidence_entropy": 0.49697460263110704, "calibration/coverage@0%": 0.11687982616119916, "calibration/coverage@1%": 0.17290076856957615, "calibration/coverage@10%": 0.3596662907420923, "calibration/coverage@15%": 0.46583535249434255, "calibration/coverage@20%": 0.6390530315041969, "calibration/coverage@25%": 0.7664757895868467, "calibration/coverage@30%": 0.8486663495586914, "calibration/coverage@5%": 0.27098377467777857, "calibration/ece": 0.13652749098142433, "calibration/mean_confidence": 0.5919820919460504, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.028993055555555557, "completions/max_length": 3882.8, "completions/max_terminated_length": 3882.8, "completions/mean_length": 1286.2006103515625, "completions/mean_terminated_length": 1324.7329833984375, "completions/min_length": 0.0, "completions/min_terminated_length": 208.6, "epoch": 1.405594930063374, "grad_norm": 0.0021445208694785833, "learning_rate": 2.7343750000000004e-06, "loss": -0.0811, "num_tokens": 1521667384.0, "reward": 0.9691243171691895, "reward_std": 0.16067952960729598, "rewards/accuracy_reward": 0.6624131798744202, "rewards/brier_reward": 0.8069814324378968, "rewards/confidence_uniqueness_reward": 0.9217924475669861, "rewards/format_reward": 0.9710069298744202, "rewards/frontier_coverage_0": 0.039838623628020285, "rewards/frontier_coverage_1": 0.03982721939682961, "rewards/frontier_coverage_10": 0.04834746643900871, "rewards/frontier_coverage_15": 0.09708862453699112, "rewards/frontier_coverage_20": 0.16204800009727477, "rewards/frontier_coverage_25": 0.23406701982021333, "rewards/frontier_coverage_5": 0.040019629150629045, "rewards/frontier_entropy_batch_reward": -0.2991880297660828, "signal/accuracy_reward/centered_abs_mean": 0.15218641459941865, "signal/accuracy_reward/group_std_mean": 0.20358789563179017, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9086845397949219, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07609320729970932, "signal/advantage_abs_mean": 0.7256081700325012, "signal/advantage_pre_scale_abs_mean": 0.11406800299882888, "signal/advantage_pre_scale_std": 0.1945664405822754, "signal/advantage_std": 0.9832754135131836, "signal/brier_reward/centered_abs_mean": 0.12671963274478912, "signal/brier_reward/group_std_mean": 0.17179930806159974, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1513580173254013, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012671963125467301, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05516675487160683, "signal/confidence_uniqueness_reward/group_std_mean": 0.09555203318595887, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06452755033969879, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005516675394028425, "signal/format_reward/centered_abs_mean": 0.04739583320915699, "signal/format_reward/group_std_mean": 0.08717522174119949, "signal/format_reward/group_zero_std_frac": 0.65, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2736491531133652, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.023697916604578494, "signal/frontier_coverage_0/centered_abs_mean": 0.15436613261699678, "signal/frontier_coverage_0/group_std_mean": 0.20032262206077575, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02646334134042263, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022074357373639943, "signal/frontier_coverage_1/centered_abs_mean": 0.15427806973457336, "signal/frontier_coverage_1/group_std_mean": 0.2002100557088852, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02644813396036625, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022061764262616634, "signal/frontier_coverage_10/centered_abs_mean": 0.05655251294374466, "signal/frontier_coverage_10/group_std_mean": 0.07212142795324325, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0097461000084877, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008087009191513062, "signal/frontier_coverage_15/centered_abs_mean": 0.07178578078746796, "signal/frontier_coverage_15/group_std_mean": 0.09074690490961075, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.012421418353915215, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010265366523526608, "signal/frontier_coverage_20/centered_abs_mean": 0.1017922267317772, "signal/frontier_coverage_20/group_std_mean": 0.12880659401416777, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017606715485453606, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001455628895200789, "signal/frontier_coverage_25/centered_abs_mean": 0.13876647651195526, "signal/frontier_coverage_25/group_std_mean": 0.175778591632843, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023976121470332144, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001984360720962286, "signal/frontier_coverage_5/centered_abs_mean": 0.15271045863628388, "signal/frontier_coverage_5/group_std_mean": 0.19827630817890168, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02617722600698471, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021837596548721196, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33504435420036316, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4021625995635986, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4056344449520111, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033504435792565346, "step": 585 }, { "calibration/aurc": 0.13363501840101474, "calibration/batch_distribution_entropy": 0.927256228618109, "calibration/buffer_distribution_entropy": 0.9812668042647432, "calibration/confidence_entropy": 0.488665506297988, "calibration/coverage@0%": 0.22362701483296807, "calibration/coverage@1%": 0.2689885986026016, "calibration/coverage@10%": 0.43250913802016655, "calibration/coverage@15%": 0.6891102145292469, "calibration/coverage@20%": 0.7362514182042972, "calibration/coverage@25%": 0.7613508744799712, "calibration/coverage@30%": 0.9244356505657093, "calibration/coverage@5%": 0.3492814081365009, "calibration/ece": 0.15907323105091412, "calibration/mean_confidence": 0.6248761445497155, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004600694444444442, "completions/max_length": 3681.4, "completions/max_terminated_length": 3681.4, "completions/mean_length": 1218.0521728515625, "completions/mean_terminated_length": 1223.7986083984374, "completions/min_length": 0.0, "completions/min_terminated_length": 205.4, "epoch": 1.417594780065249, "grad_norm": 0.0023973125498741865, "learning_rate": 2.7043269230769233e-06, "loss": -0.0166, "num_tokens": 1538822801.0, "reward": 0.9975666046142578, "reward_std": 0.10440057963132858, "rewards/accuracy_reward": 0.68828125, "rewards/brier_reward": 0.8343516826629639, "rewards/confidence_uniqueness_reward": 0.9442711710929871, "rewards/format_reward": 0.9953992962837219, "rewards/frontier_coverage_0": 0.04567217640578747, "rewards/frontier_coverage_1": 0.04569807052612305, "rewards/frontier_coverage_10": 0.05116933360695839, "rewards/frontier_coverage_15": 0.10133900344371796, "rewards/frontier_coverage_20": 0.1687961131334305, "rewards/frontier_coverage_25": 0.2452457994222641, "rewards/frontier_coverage_5": 0.04587310701608658, "rewards/frontier_entropy_batch_reward": -0.32200189828872683, "signal/accuracy_reward/centered_abs_mean": 0.11290690153837205, "signal/accuracy_reward/group_std_mean": 0.15099144130945205, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.925674319267273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05645345076918602, "signal/advantage_abs_mean": 0.7637529969215393, "signal/advantage_pre_scale_abs_mean": 0.07765910625457764, "signal/advantage_pre_scale_std": 0.12909533083438873, "signal/advantage_std": 0.9828407287597656, "signal/brier_reward/centered_abs_mean": 0.10314983427524567, "signal/brier_reward/group_std_mean": 0.136023673415184, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1698082685470581, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010314983315765858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021420668810606003, "signal/confidence_uniqueness_reward/group_std_mean": 0.034321589022874834, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035426610708236696, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002142066927626729, "signal/format_reward/centered_abs_mean": 0.008393011963926255, "signal/format_reward/group_std_mean": 0.018513968773186208, "signal/format_reward/group_zero_std_frac": 0.9138888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06900344025343656, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004196505981963128, "signal/frontier_coverage_0/centered_abs_mean": 0.14370577037334442, "signal/frontier_coverage_0/group_std_mean": 0.18573465943336487, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0336445227265358, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020549925277009605, "signal/frontier_coverage_1/centered_abs_mean": 0.14363015294075013, "signal/frontier_coverage_1/group_std_mean": 0.18563660383224487, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033626696467399596, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002053911192342639, "signal/frontier_coverage_10/centered_abs_mean": 0.05578533932566643, "signal/frontier_coverage_10/group_std_mean": 0.07023323774337768, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01311029139906168, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007977303350344301, "signal/frontier_coverage_15/centered_abs_mean": 0.07076951265335082, "signal/frontier_coverage_15/group_std_mean": 0.08869308978319168, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01673793625086546, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001012004038784653, "signal/frontier_coverage_20/centered_abs_mean": 0.09809644967317581, "signal/frontier_coverage_20/group_std_mean": 0.12331822216510772, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02323850505053997, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014027792029082775, "signal/frontier_coverage_25/centered_abs_mean": 0.13125519305467606, "signal/frontier_coverage_25/group_std_mean": 0.1657370448112488, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03108687661588192, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018769492628052831, "signal/frontier_coverage_5/centered_abs_mean": 0.14228105694055557, "signal/frontier_coverage_5/group_std_mean": 0.18392003774642945, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033308600261807444, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002034619217738509, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33420050144195557, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3994639039039612, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5513591527938843, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03342005014419556, "step": 590 }, { "calibration/aurc": 0.07480890651703404, "calibration/batch_distribution_entropy": 0.9828328527315134, "calibration/buffer_distribution_entropy": 0.9817377142431003, "calibration/confidence_entropy": 0.4830405262105046, "calibration/coverage@0%": 0.23835041511170654, "calibration/coverage@1%": 0.317058802840166, "calibration/coverage@10%": 0.7133245133487348, "calibration/coverage@15%": 0.8062275869296771, "calibration/coverage@20%": 0.8965332861149463, "calibration/coverage@25%": 0.9566224215001435, "calibration/coverage@30%": 0.985378590078329, "calibration/coverage@5%": 0.5676621045958471, "calibration/ece": 0.21566845079895333, "calibration/mean_confidence": 0.5305456805763834, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0018229166666666962, "completions/max_length": 3201.4, "completions/max_terminated_length": 3201.4, "completions/mean_length": 1157.2978271484376, "completions/mean_terminated_length": 1159.3373779296876, "completions/min_length": 0.0, "completions/min_terminated_length": 174.2, "epoch": 1.429594630067124, "grad_norm": 0.00260770577006042, "learning_rate": 2.6742788461538467e-06, "loss": 0.0084, "num_tokens": 1555252088.0, "reward": 1.0056678652763367, "reward_std": 0.10079189985990525, "rewards/accuracy_reward": 0.6982638835906982, "rewards/brier_reward": 0.8137720704078675, "rewards/confidence_uniqueness_reward": 0.9492265462875367, "rewards/format_reward": 0.9981770873069763, "rewards/frontier_coverage_0": 0.026146640256047248, "rewards/frontier_coverage_1": 0.02614601030945778, "rewards/frontier_coverage_10": 0.0456491582095623, "rewards/frontier_coverage_15": 0.0906353935599327, "rewards/frontier_coverage_20": 0.15246600657701492, "rewards/frontier_coverage_25": 0.2238086700439453, "rewards/frontier_coverage_5": 0.026459738612174988, "rewards/frontier_entropy_batch_reward": -0.27308249771595, "signal/accuracy_reward/centered_abs_mean": 0.12478298544883729, "signal/accuracy_reward/group_std_mean": 0.1637239784002304, "signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.070089840888977, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06239149272441864, "signal/advantage_abs_mean": 0.7668791890144349, "signal/advantage_pre_scale_abs_mean": 0.07820883989334107, "signal/advantage_pre_scale_std": 0.12572188526391984, "signal/advantage_std": 0.982763123512268, "signal/brier_reward/centered_abs_mean": 0.10813791900873185, "signal/brier_reward/group_std_mean": 0.13981907367706298, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1857350766658783, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010813792422413825, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015965032763779162, "signal/confidence_uniqueness_reward/group_std_mean": 0.022816429287195204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027523915842175485, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015965032856911422, "signal/format_reward/centered_abs_mean": 0.003271484305150807, "signal/format_reward/group_std_mean": 0.007144530490040779, "signal/format_reward/group_zero_std_frac": 0.9666666507720947, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.028176695853471757, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016357421525754034, "signal/frontier_coverage_0/centered_abs_mean": 0.16511012017726898, "signal/frontier_coverage_0/group_std_mean": 0.21421845853328705, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0404993049800396, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023610747884958982, "signal/frontier_coverage_1/centered_abs_mean": 0.16507968604564666, "signal/frontier_coverage_1/group_std_mean": 0.21418104469776153, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0404917910695076, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023606396745890377, "signal/frontier_coverage_10/centered_abs_mean": 0.058770237118005754, "signal/frontier_coverage_10/group_std_mean": 0.0750571459531784, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014474144019186497, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008404143969528377, "signal/frontier_coverage_15/centered_abs_mean": 0.06682768538594246, "signal/frontier_coverage_15/group_std_mean": 0.08372382670640946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016561147198081015, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009556358796544373, "signal/frontier_coverage_20/centered_abs_mean": 0.09060783386230468, "signal/frontier_coverage_20/group_std_mean": 0.11350671499967575, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022481374442577362, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012956920312717558, "signal/frontier_coverage_25/centered_abs_mean": 0.12240231782197952, "signal/frontier_coverage_25/group_std_mean": 0.15335234999656677, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030351197719573973, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017503531882539392, "signal/frontier_coverage_5/centered_abs_mean": 0.16379218697547912, "signal/frontier_coverage_5/group_std_mean": 0.2125529944896698, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04017730951309204, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023422284051775933, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3053570449352264, "signal/frontier_entropy_batch_reward/group_std_mean": 0.37301817536354065, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.526164972782135, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030535706505179405, "step": 595 }, { "calibration/aurc": 0.11776333779549893, "calibration/batch_distribution_entropy": 0.9177009565648232, "calibration/buffer_distribution_entropy": 0.9813989915403323, "calibration/confidence_entropy": 0.48154410169959566, "calibration/coverage@0%": 0.15052083333333333, "calibration/coverage@1%": 0.2572916666666667, "calibration/coverage@10%": 0.6625, "calibration/coverage@15%": 0.7395833333333333, "calibration/coverage@20%": 0.7776041666666667, "calibration/coverage@25%": 0.8338541666666668, "calibration/coverage@30%": 0.8661458333333332, "calibration/coverage@5%": 0.37395833333333334, "calibration/ece": 0.15964555208333336, "calibration/mean_confidence": 0.6478661145833333, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0007812500000000222, "completions/max_length": 3292.2, "completions/max_terminated_length": 3292.2, "completions/mean_length": 1060.505810546875, "completions/mean_terminated_length": 1061.3367553710937, "completions/min_length": 36.4, "completions/min_terminated_length": 216.2, "epoch": 1.441594480068999, "grad_norm": 0.0025479544419795275, "learning_rate": 2.6442307692307696e-06, "loss": 0.0073, "num_tokens": 1570563899.0, "reward": 1.020909571647644, "reward_std": 0.10144262760877609, "rewards/accuracy_reward": 0.729600703716278, "rewards/brier_reward": 0.8360188364982605, "rewards/confidence_uniqueness_reward": 0.9478083848953247, "rewards/format_reward": 0.99921875, "rewards/frontier_coverage_0": 0.0182599871623097, "rewards/frontier_coverage_1": 0.018277949932962657, "rewards/frontier_coverage_10": 0.05050070583820343, "rewards/frontier_coverage_15": 0.10800392180681229, "rewards/frontier_coverage_20": 0.18200758695602418, "rewards/frontier_coverage_25": 0.2649056166410446, "rewards/frontier_coverage_5": 0.018470912738121115, "rewards/frontier_entropy_batch_reward": -0.31326996684074404, "signal/accuracy_reward/centered_abs_mean": 0.12102321982383728, "signal/accuracy_reward/group_std_mean": 0.1607717901468277, "signal/accuracy_reward/group_zero_std_frac": 0.5416666686534881, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.993973171710968, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06051160991191864, "signal/advantage_abs_mean": 0.7755637049674988, "signal/advantage_pre_scale_abs_mean": 0.07838105112314224, "signal/advantage_pre_scale_std": 0.12489996254444122, "signal/advantage_std": 0.9828433275222779, "signal/brier_reward/centered_abs_mean": 0.10264720171689987, "signal/brier_reward/group_std_mean": 0.13262540996074676, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1685381680727005, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01026472058147192, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015869051963090897, "signal/confidence_uniqueness_reward/group_std_mean": 0.02228650264441967, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026085112243890762, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015869052149355412, "signal/format_reward/centered_abs_mean": 0.0015136718400754034, "signal/format_reward/group_std_mean": 0.0044194171205163, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012289304099977017, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007568359200377017, "signal/frontier_coverage_0/centered_abs_mean": 0.1449378103017807, "signal/frontier_coverage_0/group_std_mean": 0.189239040017128, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03399923667311668, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020726106828078627, "signal/frontier_coverage_1/centered_abs_mean": 0.14487462043762206, "signal/frontier_coverage_1/group_std_mean": 0.18915866911411286, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033984321355819705, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020717070437967777, "signal/frontier_coverage_10/centered_abs_mean": 0.056587740778923035, "signal/frontier_coverage_10/group_std_mean": 0.07166957706212998, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013312225975096226, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008092047180980444, "signal/frontier_coverage_15/centered_abs_mean": 0.07792377918958664, "signal/frontier_coverage_15/group_std_mean": 0.09718545377254487, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01838395856320858, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001114309998229146, "signal/frontier_coverage_20/centered_abs_mean": 0.10968948751688004, "signal/frontier_coverage_20/group_std_mean": 0.13686617612838745, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02588742785155773, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015685596736148, "signal/frontier_coverage_25/centered_abs_mean": 0.14639344513416291, "signal/frontier_coverage_25/group_std_mean": 0.1832552284002304, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03454747945070267, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002093426208011806, "signal/frontier_coverage_5/centered_abs_mean": 0.1441301167011261, "signal/frontier_coverage_5/group_std_mean": 0.18819935619831085, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03380856290459633, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002061060653068125, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3321432411670685, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3983724594116211, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5467443525791168, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0332143247127533, "step": 600 }, { "epoch": 1.441594480068999, "eval_calibration/aurc": 0.11802945623865287, "eval_calibration/batch_distribution_entropy": 0.9366906879155904, "eval_calibration/buffer_distribution_entropy": 0.9812793383792132, "eval_calibration/confidence_entropy": 0.5005515326063407, "eval_calibration/coverage@0%": 0.2916666666666667, "eval_calibration/coverage@1%": 0.2916666666666667, "eval_calibration/coverage@10%": 0.6197916666666666, "eval_calibration/coverage@15%": 0.7239583333333334, "eval_calibration/coverage@20%": 0.796875, "eval_calibration/coverage@25%": 0.8854166666666666, "eval_calibration/coverage@30%": 0.984375, "eval_calibration/coverage@5%": 0.3802083333333333, "eval_calibration/ece": 0.18726786858974356, "eval_calibration/mean_confidence": 0.5804040064102564, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 2411.8333333333335, "eval_completions/max_terminated_length": 2411.8333333333335, "eval_completions/mean_length": 993.3123575846354, "eval_completions/mean_terminated_length": 993.3123575846354, "eval_completions/min_length": 241.83333333333334, "eval_completions/min_terminated_length": 241.83333333333334, "eval_loss": 0.0, "eval_num_tokens": 1570563899.0, "eval_reward": 0.9275561968485514, "eval_reward_std": 0.23124410212039948, "eval_rewards/accuracy_reward": 0.6875, "eval_rewards/brier_reward": 0.8379482428232828, "eval_rewards/confidence_uniqueness_reward": 0.8963758647441864, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.04966919838140408, "eval_rewards/frontier_coverage_1": 0.049654243824382625, "eval_rewards/frontier_coverage_10": 0.055118689934412636, "eval_rewards/frontier_coverage_15": 0.10419152304530144, "eval_rewards/frontier_coverage_20": 0.17086196939150491, "eval_rewards/frontier_coverage_25": 0.24627631157636642, "eval_rewards/frontier_coverage_5": 0.04966597332774351, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 131.1254, "eval_samples_per_second": 7.626, "eval_signal/accuracy_reward/centered_abs_mean": 0.4157986144224803, "eval_signal/accuracy_reward/group_std_mean": 0.46200739840666455, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9027682542800903, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20789930721124014, "eval_signal/advantage_abs_mean": 0.8828665316104889, "eval_signal/advantage_pre_scale_abs_mean": 0.20522412161032358, "eval_signal/advantage_pre_scale_std": 0.22870965053637823, "eval_signal/advantage_std": 0.9863951603571574, "eval_signal/brier_reward/centered_abs_mean": 0.1551913395524025, "eval_signal/brier_reward/group_std_mean": 0.21014980723460516, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06728844096263249, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.015519133924196163, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.041531032572189965, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04975000210106373, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.018028488382697105, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0041531034124394255, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2648828824361165, "eval_signal/frontier_coverage_0/group_std_mean": 0.3670547952254613, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01645986953129371, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037878251556927958, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26476379483938217, "eval_signal/frontier_coverage_1/group_std_mean": 0.36690954864025116, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016452479176223278, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037861222323651114, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.07583417370915413, "eval_signal/frontier_coverage_10/group_std_mean": 0.09882033616304398, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004711338396494587, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010844287074481447, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.11589070161183675, "eval_signal/frontier_coverage_15/group_std_mean": 0.1468774676322937, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0072002453574289875, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016572370271508892, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.197922649482886, "eval_signal/frontier_coverage_20/group_std_mean": 0.24237419913212457, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01229737838730216, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028302938444539905, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.28636286159356433, "eval_signal/frontier_coverage_25/group_std_mean": 0.34704581399758655, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.017790169765551884, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00409498888378342, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2635475993156433, "eval_signal/frontier_coverage_5/group_std_mean": 0.36541228493054706, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016376903591056664, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037687306369965277, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.046, "step": 600 }, { "epoch": 1.441594480068999, "step": 600, "train_probe_calibration/aurc": 0.11675127139560688, "train_probe_calibration/batch_distribution_entropy": 0.9230508777439455, "train_probe_calibration/buffer_distribution_entropy": 0.9812899727047678, "train_probe_calibration/confidence_entropy": 0.48371354175712417, "train_probe_calibration/coverage@0%": 0.34375, "train_probe_calibration/coverage@1%": 0.34375, "train_probe_calibration/coverage@10%": 0.515625, "train_probe_calibration/coverage@15%": 0.7083333333333334, "train_probe_calibration/coverage@20%": 0.8385416666666666, "train_probe_calibration/coverage@25%": 0.9739583333333334, "train_probe_calibration/coverage@30%": 0.9947916666666666, "train_probe_calibration/coverage@5%": 0.3489583333333333, "train_probe_calibration/ece": 0.21593437499999998, "train_probe_calibration/mean_confidence": 0.5957906249999999, "train_probe_completions/clipped_ratio": 0.0008680555555555617, "train_probe_completions/max_length": 2463.8333333333335, "train_probe_completions/max_terminated_length": 2463.8333333333335, "train_probe_completions/mean_length": 957.962880452474, "train_probe_completions/mean_terminated_length": 958.8002115885416, "train_probe_completions/min_length": 142.83333333333334, "train_probe_completions/min_terminated_length": 171.83333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 1570563899.0, "train_probe_reward": 0.9545861085255941, "train_probe_reward_std": 0.21626246720552444, "train_probe_rewards/accuracy_reward": 0.7447916766007742, "train_probe_rewards/brier_reward": 0.8374614318211874, "train_probe_rewards/confidence_uniqueness_reward": 0.89298415184021, "train_probe_rewards/format_reward": 0.9991319477558136, "train_probe_rewards/frontier_coverage_0": 0.010258166119456291, "train_probe_rewards/frontier_coverage_1": 0.010285623526821533, "train_probe_rewards/frontier_coverage_10": 0.05134387003878752, "train_probe_rewards/frontier_coverage_15": 0.11345388740301132, "train_probe_rewards/frontier_coverage_20": 0.19093378633260727, "train_probe_rewards/frontier_coverage_25": 0.2771032725771268, "train_probe_rewards/frontier_coverage_5": 0.010462871704172963, "train_probe_rewards/frontier_entropy_batch_reward": -0.9991319477558136, "train_probe_runtime": 143.1787, "train_probe_samples_per_second": 6.984, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3704427083333333, "train_probe_signal/accuracy_reward/group_std_mean": 0.43535151580969494, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8639881908893585, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18522135416666666, "train_probe_signal/advantage_abs_mean": 0.8328354756037394, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18153581271568933, "train_probe_signal/advantage_pre_scale_std": 0.21439906706412634, "train_probe_signal/advantage_std": 0.9863629341125488, "train_probe_signal/brier_reward/centered_abs_mean": 0.15394766877094904, "train_probe_signal/brier_reward/group_std_mean": 0.20854342232147852, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07198699191212654, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.015394768056770166, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04392562434077263, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.054374140997727714, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020567491340140503, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004392562434077263, "train_probe_signal/format_reward/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/format_reward/group_std_mean": 0.0049104637776811915, "train_probe_signal/format_reward/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0037552444264292717, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.25623046855131787, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.36612696945667267, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.017111041583120823, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036640956920261183, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.256125142176946, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.36598806579907733, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.017104018479585648, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036625893941769996, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07513122757275899, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.09937256947159767, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005020403225595753, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010743765354466934, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11616303771734238, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.14261490354935327, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007758967267970244, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016611314301068585, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1925310716032982, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.23078140864769617, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012859225738793612, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002753194266309341, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.27361434201399487, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.32750125726064044, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01827398408204317, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003912684895719091, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.25470831741889316, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3641922523578008, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.017009020938227575, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003642329053642849, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007510488697638115, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746, "train_probe_steps_per_second": 0.042 }, { "calibration/aurc": 0.09558923489699658, "calibration/batch_distribution_entropy": 0.9584420972144472, "calibration/buffer_distribution_entropy": 0.9811046622030893, "calibration/confidence_entropy": 0.48270719378053056, "calibration/coverage@0%": 0.0750751573603887, "calibration/coverage@1%": 0.1750751573603887, "calibration/coverage@10%": 0.6530113270354913, "calibration/coverage@15%": 0.7547184024279928, "calibration/coverage@20%": 0.8532883031633121, "calibration/coverage@25%": 0.9320883381132516, "calibration/coverage@30%": 0.9801646673245479, "calibration/coverage@5%": 0.43802802890908216, "calibration/ece": 0.14332990975185758, "calibration/mean_confidence": 0.6008687151115122, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0018229166666666962, "completions/max_length": 3653.0, "completions/max_terminated_length": 3653.0, "completions/mean_length": 943.5162231445313, "completions/mean_terminated_length": 945.2334350585937, "completions/min_length": 0.0, "completions/min_terminated_length": 120.2, "epoch": 1.453594330070874, "grad_norm": 0.002810286357998848, "learning_rate": 2.6141826923076926e-06, "loss": 0.0089, "num_tokens": 1584531990.0, "reward": 1.0092534184455872, "reward_std": 0.10346733331680298, "rewards/accuracy_reward": 0.7048611044883728, "rewards/brier_reward": 0.8292035460472107, "rewards/confidence_uniqueness_reward": 0.9482634544372559, "rewards/format_reward": 0.9981770753860474, "rewards/frontier_coverage_0": 0.02842591591179371, "rewards/frontier_coverage_1": 0.028446278348565102, "rewards/frontier_coverage_10": 0.04861754775047302, "rewards/frontier_coverage_15": 0.09893043637275696, "rewards/frontier_coverage_20": 0.16540935039520263, "rewards/frontier_coverage_25": 0.2422512799501419, "rewards/frontier_coverage_5": 0.028660116344690324, "rewards/frontier_entropy_batch_reward": -0.2917499512434006, "signal/accuracy_reward/centered_abs_mean": 0.12080078125, "signal/accuracy_reward/group_std_mean": 0.1602880299091339, "signal/accuracy_reward/group_zero_std_frac": 0.5361111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9658271431922912, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.060400390625, "signal/advantage_abs_mean": 0.7683446645736695, "signal/advantage_pre_scale_abs_mean": 0.07905568778514863, "signal/advantage_pre_scale_std": 0.12629517465829848, "signal/advantage_std": 0.9828693866729736, "signal/brier_reward/centered_abs_mean": 0.1080152839422226, "signal/brier_reward/group_std_mean": 0.14062872529029846, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17367709279060364, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010801529139280319, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016668078303337098, "signal/confidence_uniqueness_reward/group_std_mean": 0.024647758901119234, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02698330543935299, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016668078489601613, "signal/format_reward/centered_abs_mean": 0.003390841977670789, "signal/format_reward/group_std_mean": 0.00823095440864563, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.026670993864536287, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016954209888353944, "signal/frontier_coverage_0/centered_abs_mean": 0.15668127536773682, "signal/frontier_coverage_0/group_std_mean": 0.20213212072849274, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035876476764678956, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022405422292649745, "signal/frontier_coverage_1/centered_abs_mean": 0.15663088858127594, "signal/frontier_coverage_1/group_std_mean": 0.202065372467041, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03586488664150238, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022398216184228658, "signal/frontier_coverage_10/centered_abs_mean": 0.05823779553174972, "signal/frontier_coverage_10/group_std_mean": 0.07387124150991439, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013428068906068801, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008328004740178585, "signal/frontier_coverage_15/centered_abs_mean": 0.07526019364595413, "signal/frontier_coverage_15/group_std_mean": 0.09428713768720627, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017477550357580186, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010762207210063934, "signal/frontier_coverage_20/centered_abs_mean": 0.1047041043639183, "signal/frontier_coverage_20/group_std_mean": 0.13193972706794738, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024335138872265814, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014972686767578125, "signal/frontier_coverage_25/centered_abs_mean": 0.14114450812339782, "signal/frontier_coverage_25/group_std_mean": 0.17838573157787324, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032783514633774755, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020183662883937357, "signal/frontier_coverage_5/centered_abs_mean": 0.15568934231996537, "signal/frontier_coverage_5/group_std_mean": 0.20089252889156342, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03564789295196533, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022263576043769716, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3193602502346039, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3889504611492157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5186540305614471, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193602599203586, "step": 605 }, { "calibration/aurc": 0.16828026211478242, "calibration/batch_distribution_entropy": 0.9673414895621573, "calibration/buffer_distribution_entropy": 0.9803350494367313, "calibration/confidence_entropy": 0.4826084238676106, "calibration/coverage@0%": 0.09375000000000001, "calibration/coverage@1%": 0.10833333333333332, "calibration/coverage@10%": 0.4005208333333333, "calibration/coverage@15%": 0.5015625, "calibration/coverage@20%": 0.5817708333333333, "calibration/coverage@25%": 0.7057291666666666, "calibration/coverage@30%": 0.8677083333333332, "calibration/coverage@5%": 0.31614583333333335, "calibration/ece": 0.17711151736111114, "calibration/mean_confidence": 0.5755484201388888, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005208333333333482, "completions/max_length": 2950.2, "completions/max_terminated_length": 2950.2, "completions/mean_length": 807.0048583984375, "completions/mean_terminated_length": 807.4189697265625, "completions/min_length": 15.4, "completions/min_terminated_length": 88.6, "epoch": 1.465594180072749, "grad_norm": 0.0028614369221031666, "learning_rate": 2.584134615384616e-06, "loss": 0.0052, "num_tokens": 1596957678.0, "reward": 1.0093258619308472, "reward_std": 0.10231070816516877, "rewards/accuracy_reward": 0.7103298664093017, "rewards/brier_reward": 0.8057976961135864, "rewards/confidence_uniqueness_reward": 0.9493979334831237, "rewards/format_reward": 0.9985243082046509, "rewards/frontier_coverage_0": 0.003922509960830212, "rewards/frontier_coverage_1": 0.003951227106153965, "rewards/frontier_coverage_10": 0.04210040867328644, "rewards/frontier_coverage_15": 0.09112356156110764, "rewards/frontier_coverage_20": 0.1539991855621338, "rewards/frontier_coverage_25": 0.22835943698883057, "rewards/frontier_coverage_5": 0.004323094466235489, "rewards/frontier_entropy_batch_reward": -0.28168027102947235, "signal/accuracy_reward/centered_abs_mean": 0.12170681655406952, "signal/accuracy_reward/group_std_mean": 0.16457101106643676, "signal/accuracy_reward/group_zero_std_frac": 0.5194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9728019595146179, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06085340827703476, "signal/advantage_abs_mean": 0.7609669446945191, "signal/advantage_pre_scale_abs_mean": 0.07733545005321503, "signal/advantage_pre_scale_std": 0.12450267225503922, "signal/advantage_std": 0.9828919887542724, "signal/brier_reward/centered_abs_mean": 0.11052588373422623, "signal/brier_reward/group_std_mean": 0.14374896585941316, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17655435502529143, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011052588745951653, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015836169011890888, "signal/confidence_uniqueness_reward/group_std_mean": 0.022552402690052986, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02531985826790333, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015836169477552176, "signal/format_reward/centered_abs_mean": 0.002718098950572312, "signal/format_reward/group_std_mean": 0.006266768835484981, "signal/format_reward/group_zero_std_frac": 0.9694444298744201, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02169901877641678, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001359049475286156, "signal/frontier_coverage_0/centered_abs_mean": 0.15925846099853516, "signal/frontier_coverage_0/group_std_mean": 0.2087152421474457, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0364031545817852, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022773958742618563, "signal/frontier_coverage_1/centered_abs_mean": 0.15921551287174224, "signal/frontier_coverage_1/group_std_mean": 0.2086589068174362, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0363933652639389, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002276781853288412, "signal/frontier_coverage_10/centered_abs_mean": 0.0601221852004528, "signal/frontier_coverage_10/group_std_mean": 0.0764574259519577, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013739870116114616, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008597472333349288, "signal/frontier_coverage_15/centered_abs_mean": 0.07345416396856308, "signal/frontier_coverage_15/group_std_mean": 0.09185772836208343, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016773372143507003, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010503945406526328, "signal/frontier_coverage_20/centered_abs_mean": 0.10081402510404587, "signal/frontier_coverage_20/group_std_mean": 0.1262542188167572, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023017995804548264, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014416405465453863, "signal/frontier_coverage_25/centered_abs_mean": 0.13552749156951904, "signal/frontier_coverage_25/group_std_mean": 0.17060936391353607, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03094673380255699, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019380431389436127, "signal/frontier_coverage_5/centered_abs_mean": 0.15849049389362335, "signal/frontier_coverage_5/group_std_mean": 0.20773231983184814, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03622789680957794, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022664140444248913, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31973678469657896, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38735169768333433, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5107354879379272, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03197368010878563, "step": 610 }, { "calibration/aurc": 0.17277663233935256, "calibration/batch_distribution_entropy": 0.9765311373401812, "calibration/buffer_distribution_entropy": 0.9808250588163799, "calibration/confidence_entropy": 0.48948248943083283, "calibration/coverage@0%": 0.005729166666666666, "calibration/coverage@1%": 0.005729166666666666, "calibration/coverage@10%": 0.2145833333333333, "calibration/coverage@15%": 0.5979166666666667, "calibration/coverage@20%": 0.7630208333333333, "calibration/coverage@25%": 0.8369791666666668, "calibration/coverage@30%": 0.9041666666666666, "calibration/coverage@5%": 0.005729166666666666, "calibration/ece": 0.1787409583333333, "calibration/mean_confidence": 0.5740234166666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00017361111111111605, "completions/max_length": 2826.0, "completions/max_terminated_length": 2826.0, "completions/mean_length": 790.60712890625, "completions/mean_terminated_length": 790.7543823242188, "completions/min_length": 98.6, "completions/min_terminated_length": 133.6, "epoch": 1.477594030074624, "grad_norm": 0.003204671898856759, "learning_rate": 2.554086538461539e-06, "loss": 0.0008, "num_tokens": 1609145056.0, "reward": 1.0107253670692444, "reward_std": 0.10107671320438386, "rewards/accuracy_reward": 0.7134548544883728, "rewards/brier_reward": 0.8200236558914185, "rewards/confidence_uniqueness_reward": 0.9490665197372437, "rewards/format_reward": 0.9998263835906982, "rewards/frontier_coverage_0": 0.016171068139374255, "rewards/frontier_coverage_1": 0.016171068139374255, "rewards/frontier_coverage_10": 0.044859865307807924, "rewards/frontier_coverage_15": 0.09566431641578674, "rewards/frontier_coverage_20": 0.16115307211875915, "rewards/frontier_coverage_25": 0.23886812329292298, "rewards/frontier_coverage_5": 0.01635436974465847, "rewards/frontier_entropy_batch_reward": -0.3125044822692871, "signal/accuracy_reward/centered_abs_mean": 0.12219509482383728, "signal/accuracy_reward/group_std_mean": 0.16446252465248107, "signal/accuracy_reward/group_zero_std_frac": 0.5138889074325561, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.986894679069519, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06109754741191864, "signal/advantage_abs_mean": 0.769283926486969, "signal/advantage_pre_scale_abs_mean": 0.07805669158697129, "signal/advantage_pre_scale_std": 0.1223902866244316, "signal/advantage_std": 0.9828672289848328, "signal/brier_reward/centered_abs_mean": 0.11475347429513931, "signal/brier_reward/group_std_mean": 0.14670601189136506, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1857527107000351, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011475348100066184, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014028819277882576, "signal/confidence_uniqueness_reward/group_std_mean": 0.017688148841261863, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02275208830833435, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014028819277882576, "signal/format_reward/centered_abs_mean": 0.0003255208255723119, "signal/format_reward/group_std_mean": 0.0006831518840044737, "signal/format_reward/group_zero_std_frac": 0.9972222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0025148998945951464, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00016276041278615594, "signal/frontier_coverage_0/centered_abs_mean": 0.15996686220169068, "signal/frontier_coverage_0/group_std_mean": 0.2047890156507492, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03700179383158684, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022875262191519143, "signal/frontier_coverage_1/centered_abs_mean": 0.15996686220169068, "signal/frontier_coverage_1/group_std_mean": 0.2047890156507492, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03700179383158684, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022875262191519143, "signal/frontier_coverage_10/centered_abs_mean": 0.061971521377563475, "signal/frontier_coverage_10/group_std_mean": 0.07828292399644851, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01436650361865759, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008861927315592765, "signal/frontier_coverage_15/centered_abs_mean": 0.07821848094463349, "signal/frontier_coverage_15/group_std_mean": 0.09751923680305481, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01814446821808815, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011185242561623453, "signal/frontier_coverage_20/centered_abs_mean": 0.10835776478052139, "signal/frontier_coverage_20/group_std_mean": 0.13570416867733, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02512829452753067, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015495160361751914, "signal/frontier_coverage_25/centered_abs_mean": 0.14593692719936371, "signal/frontier_coverage_25/group_std_mean": 0.18364288806915283, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03382998965680599, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002086897986009717, "signal/frontier_coverage_5/centered_abs_mean": 0.1591697096824646, "signal/frontier_coverage_5/group_std_mean": 0.20380557775497438, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036816838383674624, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002276126807555556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32349973320961, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3918896377086639, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5253556430339813, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0323499746620655, "step": 615 }, { "calibration/aurc": 0.14598407216296222, "calibration/batch_distribution_entropy": 0.982388407039738, "calibration/buffer_distribution_entropy": 0.9813693991185566, "calibration/confidence_entropy": 0.489513506980086, "calibration/coverage@0%": 0.13854166666666667, "calibration/coverage@1%": 0.21770833333333334, "calibration/coverage@10%": 0.4604166666666667, "calibration/coverage@15%": 0.5911458333333334, "calibration/coverage@20%": 0.6604166666666667, "calibration/coverage@25%": 0.7458333333333333, "calibration/coverage@30%": 0.8333333333333334, "calibration/coverage@5%": 0.3848958333333333, "calibration/ece": 0.22271012485923425, "calibration/mean_confidence": 0.5231795626407657, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 2951.8, "completions/max_terminated_length": 2951.8, "completions/mean_length": 739.6912475585938, "completions/mean_terminated_length": 740.016650390625, "completions/min_length": 78.2, "completions/min_terminated_length": 144.4, "epoch": 1.489593880076499, "grad_norm": 0.0033593103289604187, "learning_rate": 2.5240384615384618e-06, "loss": 0.0135, "num_tokens": 1620771995.0, "reward": 1.0207280397415162, "reward_std": 0.1027398332953453, "rewards/accuracy_reward": 0.7306423664093018, "rewards/brier_reward": 0.8059651494026184, "rewards/confidence_uniqueness_reward": 0.950669014453888, "rewards/format_reward": 0.9995659708976745, "rewards/frontier_coverage_0": -0.008429169561713934, "rewards/frontier_coverage_1": -0.008429169561713934, "rewards/frontier_coverage_10": 0.03902908526360989, "rewards/frontier_coverage_15": 0.09024645537137985, "rewards/frontier_coverage_20": 0.1556310087442398, "rewards/frontier_coverage_25": 0.23286145329475402, "rewards/frontier_coverage_5": -0.008092107716947794, "rewards/frontier_entropy_batch_reward": -0.2708683729171753, "signal/accuracy_reward/centered_abs_mean": 0.12894422858953475, "signal/accuracy_reward/group_std_mean": 0.17072508335113526, "signal/accuracy_reward/group_zero_std_frac": 0.5111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0046961665153504, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06447211429476737, "signal/advantage_abs_mean": 0.769278085231781, "signal/advantage_pre_scale_abs_mean": 0.07922997027635574, "signal/advantage_pre_scale_std": 0.12357212156057358, "signal/advantage_std": 0.9829211473464966, "signal/brier_reward/centered_abs_mean": 0.11856135725975037, "signal/brier_reward/group_std_mean": 0.1511201113462448, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18537597954273224, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011856135725975037, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014381918683648109, "signal/confidence_uniqueness_reward/group_std_mean": 0.018994222208857537, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02267582081258297, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014381919521838427, "signal/format_reward/centered_abs_mean": 0.0008300781133584678, "signal/format_reward/group_std_mean": 0.0021562909707427023, "signal/format_reward/group_zero_std_frac": 0.9888888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006034436263144016, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004150390566792339, "signal/frontier_coverage_0/centered_abs_mean": 0.17420557141304016, "signal/frontier_coverage_0/group_std_mean": 0.22342933118343353, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03900505751371384, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002491139620542526, "signal/frontier_coverage_1/centered_abs_mean": 0.17420557141304016, "signal/frontier_coverage_1/group_std_mean": 0.22342933118343353, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03900505751371384, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002491139620542526, "signal/frontier_coverage_10/centered_abs_mean": 0.06426115781068802, "signal/frontier_coverage_10/group_std_mean": 0.08098717033863068, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014438208192586899, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009189345757476986, "signal/frontier_coverage_15/centered_abs_mean": 0.07448446601629258, "signal/frontier_coverage_15/group_std_mean": 0.09238378256559372, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016728433780372143, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010651277843862772, "signal/frontier_coverage_20/centered_abs_mean": 0.10093164592981338, "signal/frontier_coverage_20/group_std_mean": 0.12564358860254288, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02264065630733967, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014433225616812706, "signal/frontier_coverage_25/centered_abs_mean": 0.13600083589553832, "signal/frontier_coverage_25/group_std_mean": 0.17014565765857698, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0304828904569149, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019448119215667247, "signal/frontier_coverage_5/centered_abs_mean": 0.17328290045261383, "signal/frontier_coverage_5/group_std_mean": 0.22228720486164094, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038798777014017106, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002477945387363434, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3208712935447693, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3883806228637695, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5037640929222107, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03208713196218014, "step": 620 }, { "calibration/aurc": 0.13299089792623325, "calibration/batch_distribution_entropy": 0.9681063933987465, "calibration/buffer_distribution_entropy": 0.9824702871929866, "calibration/confidence_entropy": 0.49371973292672955, "calibration/coverage@0%": 0.06255439512619669, "calibration/coverage@1%": 0.06255439512619669, "calibration/coverage@10%": 0.45554694299390774, "calibration/coverage@15%": 0.6134655134899913, "calibration/coverage@20%": 0.7713337684943429, "calibration/coverage@25%": 0.8723958333333333, "calibration/coverage@30%": 0.9348958333333333, "calibration/coverage@5%": 0.40026381636205394, "calibration/ece": 0.1668714719484334, "calibration/mean_confidence": 0.5829425275076152, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000868055555555558, "completions/max_length": 2621.2, "completions/max_terminated_length": 2621.2, "completions/mean_length": 679.5066040039062, "completions/mean_terminated_length": 680.1067504882812, "completions/min_length": 29.8, "completions/min_terminated_length": 123.8, "epoch": 1.501593730078374, "grad_norm": 0.003809634130448103, "learning_rate": 2.4939903846153847e-06, "loss": 0.006, "num_tokens": 1631696295.0, "reward": 1.0135034561157226, "reward_std": 0.1094308227300644, "rewards/accuracy_reward": 0.7105902791023254, "rewards/brier_reward": 0.8304842948913574, "rewards/confidence_uniqueness_reward": 0.9500762939453125, "rewards/format_reward": 0.9991319298744201, "rewards/frontier_coverage_0": 0.025595280434936286, "rewards/frontier_coverage_1": 0.025595280434936286, "rewards/frontier_coverage_10": 0.046889835596084596, "rewards/frontier_coverage_15": 0.0962829276919365, "rewards/frontier_coverage_20": 0.1629137009382248, "rewards/frontier_coverage_25": 0.24148197174072267, "rewards/frontier_coverage_5": 0.02585846995934844, "rewards/frontier_entropy_batch_reward": -0.2834572374820709, "signal/accuracy_reward/centered_abs_mean": 0.13683811128139495, "signal/accuracy_reward/group_std_mean": 0.18167079985141754, "signal/accuracy_reward/group_zero_std_frac": 0.4777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.055351686477661, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06841905564069747, "signal/advantage_abs_mean": 0.7610543847084046, "signal/advantage_pre_scale_abs_mean": 0.08355010896921158, "signal/advantage_pre_scale_std": 0.13148944824934006, "signal/advantage_std": 0.9829296469688416, "signal/brier_reward/centered_abs_mean": 0.10652481764554977, "signal/brier_reward/group_std_mean": 0.13742452561855317, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16548333764076234, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010652481578290462, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015012368932366372, "signal/confidence_uniqueness_reward/group_std_mean": 0.0208422277122736, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0234722301363945, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015012368094176054, "signal/format_reward/centered_abs_mean": 0.001649305538740009, "signal/format_reward/group_std_mean": 0.004259948246181011, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013225546292960643, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008246527693700045, "signal/frontier_coverage_0/centered_abs_mean": 0.15693804621696472, "signal/frontier_coverage_0/group_std_mean": 0.20245675444602967, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03491860181093216, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002244214015081525, "signal/frontier_coverage_1/centered_abs_mean": 0.15693804621696472, "signal/frontier_coverage_1/group_std_mean": 0.20245675444602967, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03491860181093216, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002244214015081525, "signal/frontier_coverage_10/centered_abs_mean": 0.05752530992031098, "signal/frontier_coverage_10/group_std_mean": 0.07304619401693344, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012821021303534508, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008226120029576123, "signal/frontier_coverage_15/centered_abs_mean": 0.07470366805791855, "signal/frontier_coverage_15/group_std_mean": 0.09323683530092239, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016622103564441205, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010682624764740468, "signal/frontier_coverage_20/centered_abs_mean": 0.1053359866142273, "signal/frontier_coverage_20/group_std_mean": 0.13195045590400695, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02341417223215103, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015063045779243112, "signal/frontier_coverage_25/centered_abs_mean": 0.1433310478925705, "signal/frontier_coverage_25/group_std_mean": 0.18033002614974974, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03183464221656322, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020496340468525885, "signal/frontier_coverage_5/centered_abs_mean": 0.15589080452919007, "signal/frontier_coverage_5/group_std_mean": 0.20115500092506408, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03468661829829216, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00222923846449703, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32396227717399595, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39139692187309266, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5038922011852265, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03239622823894024, "step": 625 }, { "calibration/aurc": 0.18900542943636528, "calibration/batch_distribution_entropy": 0.9588022252491543, "calibration/buffer_distribution_entropy": 0.9832399180795719, "calibration/confidence_entropy": 0.49511201168554775, "calibration/coverage@0%": 0.027083333333333338, "calibration/coverage@1%": 0.027083333333333338, "calibration/coverage@10%": 0.20416666666666666, "calibration/coverage@15%": 0.3796875, "calibration/coverage@20%": 0.5932291666666667, "calibration/coverage@25%": 0.7838541666666667, "calibration/coverage@30%": 0.8895833333333334, "calibration/coverage@5%": 0.0640625, "calibration/ece": 0.13272444218749999, "calibration/mean_confidence": 0.5808325671875, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0002604166666666741, "completions/max_length": 2794.4, "completions/max_terminated_length": 2794.4, "completions/mean_length": 645.9603515625, "completions/mean_terminated_length": 646.1424438476563, "completions/min_length": 102.4, "completions/min_terminated_length": 128.4, "epoch": 1.513593580080249, "grad_norm": 0.0036048083566129208, "learning_rate": 2.463942307692308e-06, "loss": 0.0061, "num_tokens": 1642238590.0, "reward": 1.0172206521034242, "reward_std": 0.09594685435295106, "rewards/accuracy_reward": 0.7276041626930236, "rewards/brier_reward": 0.8266344904899597, "rewards/confidence_uniqueness_reward": 0.9474079608917236, "rewards/format_reward": 0.9997395873069763, "rewards/frontier_coverage_0": 0.016538088396191596, "rewards/frontier_coverage_1": 0.016538088396191596, "rewards/frontier_coverage_10": 0.045445504039525984, "rewards/frontier_coverage_15": 0.09864003360271453, "rewards/frontier_coverage_20": 0.1674270361661911, "rewards/frontier_coverage_25": 0.24783487915992736, "rewards/frontier_coverage_5": 0.016732219979166983, "rewards/frontier_entropy_batch_reward": -0.3256641149520874, "signal/accuracy_reward/centered_abs_mean": 0.10422092080116271, "signal/accuracy_reward/group_std_mean": 0.14965912103652954, "signal/accuracy_reward/group_zero_std_frac": 0.522222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8708725094795227, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05211046040058136, "signal/advantage_abs_mean": 0.7522642731666564, "signal/advantage_pre_scale_abs_mean": 0.07094881534576417, "signal/advantage_pre_scale_std": 0.11598304659128189, "signal/advantage_std": 0.982811689376831, "signal/brier_reward/centered_abs_mean": 0.10578378438949584, "signal/brier_reward/group_std_mean": 0.1379517912864685, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17751872837543486, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010578378662467003, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016063277050852774, "signal/confidence_uniqueness_reward/group_std_mean": 0.020402568578720092, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026994920149445534, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001606327760964632, "signal/format_reward/centered_abs_mean": 0.000493706576526165, "signal/format_reward/group_std_mean": 0.0011741982772946358, "signal/format_reward/group_zero_std_frac": 0.9944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.004082060605287552, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0002468532882630825, "signal/frontier_coverage_0/centered_abs_mean": 0.1364389628171921, "signal/frontier_coverage_0/group_std_mean": 0.1777593731880188, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03274801447987556, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019510772079229354, "signal/frontier_coverage_1/centered_abs_mean": 0.1364389628171921, "signal/frontier_coverage_1/group_std_mean": 0.1777593731880188, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03274801447987556, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019510772079229354, "signal/frontier_coverage_10/centered_abs_mean": 0.056586884707212445, "signal/frontier_coverage_10/group_std_mean": 0.07152153998613357, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01358992587774992, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008091924944892525, "signal/frontier_coverage_15/centered_abs_mean": 0.07719572931528092, "signal/frontier_coverage_15/group_std_mean": 0.09610024690628052, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018524457514286042, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011038989294320344, "signal/frontier_coverage_20/centered_abs_mean": 0.10774560272693634, "signal/frontier_coverage_20/group_std_mean": 0.135586416721344, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02583777755498886, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015407620463520288, "signal/frontier_coverage_25/centered_abs_mean": 0.14388639628887176, "signal/frontier_coverage_25/group_std_mean": 0.1827654093503952, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03448529541492462, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002057575364597142, "signal/frontier_coverage_5/centered_abs_mean": 0.1355312928557396, "signal/frontier_coverage_5/group_std_mean": 0.1766209274530411, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03252986185252667, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019380974117666483, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3347103834152222, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3980586588382721, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5616107821464539, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033471040055155755, "step": 630 }, { "calibration/aurc": 0.10900926811330347, "calibration/batch_distribution_entropy": 0.9646768562626127, "calibration/buffer_distribution_entropy": 0.983300626563356, "calibration/confidence_entropy": 0.47932893499263984, "calibration/coverage@0%": 0.028125, "calibration/coverage@1%": 0.028125, "calibration/coverage@10%": 0.5963541666666666, "calibration/coverage@15%": 0.771875, "calibration/coverage@20%": 0.8604166666666666, "calibration/coverage@25%": 0.9265625, "calibration/coverage@30%": 0.965625, "calibration/coverage@5%": 0.3578125, "calibration/ece": 0.16915577300284446, "calibration/mean_confidence": 0.5765589813361778, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005208333333333482, "completions/max_length": 3458.0, "completions/max_terminated_length": 3458.0, "completions/mean_length": 707.9698120117188, "completions/mean_terminated_length": 708.3488525390625, "completions/min_length": 29.6, "completions/min_terminated_length": 142.2, "epoch": 1.525593430082124, "grad_norm": 0.0034078743774443865, "learning_rate": 2.433894230769231e-06, "loss": 0.0153, "num_tokens": 1653519362.0, "reward": 1.0050111770629884, "reward_std": 0.104340460896492, "rewards/accuracy_reward": 0.6942708373069764, "rewards/brier_reward": 0.8122452735900879, "rewards/confidence_uniqueness_reward": 0.9519274234771729, "rewards/format_reward": 0.9994791507720947, "rewards/frontier_coverage_0": 0.02422009650617838, "rewards/frontier_coverage_1": 0.02422009650617838, "rewards/frontier_coverage_10": 0.04215832352638245, "rewards/frontier_coverage_15": 0.08379101604223252, "rewards/frontier_coverage_20": 0.14118048250675203, "rewards/frontier_coverage_25": 0.21112163364887238, "rewards/frontier_coverage_5": 0.02440500818192959, "rewards/frontier_entropy_batch_reward": -0.26161783635616304, "signal/accuracy_reward/centered_abs_mean": 0.13336588591337203, "signal/accuracy_reward/group_std_mean": 0.17580996751785277, "signal/accuracy_reward/group_zero_std_frac": 0.5027777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0433040618896485, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06668294295668602, "signal/advantage_abs_mean": 0.7685012340545654, "signal/advantage_pre_scale_abs_mean": 0.08071336597204208, "signal/advantage_pre_scale_std": 0.12523564100265502, "signal/advantage_std": 0.9829237222671509, "signal/brier_reward/centered_abs_mean": 0.11492740660905838, "signal/brier_reward/group_std_mean": 0.14776553511619567, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18006122708320618, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011492740735411644, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.013762745633721351, "signal/confidence_uniqueness_reward/group_std_mean": 0.01849808692932129, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021554048731923105, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0013762745773419737, "signal/format_reward/centered_abs_mean": 0.0009982638759538532, "signal/format_reward/group_std_mean": 0.002647337270900607, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0077989035286009315, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004991319379769266, "signal/frontier_coverage_0/centered_abs_mean": 0.1750947952270508, "signal/frontier_coverage_0/group_std_mean": 0.22593581676483154, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03923875316977501, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025038556195795536, "signal/frontier_coverage_1/centered_abs_mean": 0.1750947952270508, "signal/frontier_coverage_1/group_std_mean": 0.22593581676483154, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03923875316977501, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025038556195795536, "signal/frontier_coverage_10/centered_abs_mean": 0.06284371763467789, "signal/frontier_coverage_10/group_std_mean": 0.08010470569133758, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014089632220566273, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008986651315353811, "signal/frontier_coverage_15/centered_abs_mean": 0.06957742124795914, "signal/frontier_coverage_15/group_std_mean": 0.08731958866119385, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015588978677988053, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009949571453034879, "signal/frontier_coverage_20/centered_abs_mean": 0.09489159286022186, "signal/frontier_coverage_20/group_std_mean": 0.119244185090065, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.021246416494250298, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013569497969001532, "signal/frontier_coverage_25/centered_abs_mean": 0.1292428568005562, "signal/frontier_coverage_25/group_std_mean": 0.1630004495382309, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.028931079804897307, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018481727223843337, "signal/frontier_coverage_5/centered_abs_mean": 0.17432405054569244, "signal/frontier_coverage_5/group_std_mean": 0.22496304512023926, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03906645104289055, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002492833789438009, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31597875356674193, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38909701704978944, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49435396790504454, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03159787617623806, "step": 635 }, { "calibration/aurc": 0.132739511797332, "calibration/batch_distribution_entropy": 0.9625923694508393, "calibration/buffer_distribution_entropy": 0.9831253583300397, "calibration/confidence_entropy": 0.48128109715281264, "calibration/coverage@0%": 0.07874510443864229, "calibration/coverage@1%": 0.12156494778067883, "calibration/coverage@10%": 0.42284323324630113, "calibration/coverage@15%": 0.6694476174934726, "calibration/coverage@20%": 0.7909432114882506, "calibration/coverage@25%": 0.8618336597040905, "calibration/coverage@30%": 0.9082435813751089, "calibration/coverage@5%": 0.31388571583986075, "calibration/ece": 0.14269792804612705, "calibration/mean_confidence": 0.5975095459394038, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001128472222222232, "completions/max_length": 3077.8, "completions/max_terminated_length": 3077.8, "completions/mean_length": 766.5240600585937, "completions/mean_terminated_length": 767.429052734375, "completions/min_length": 20.2, "completions/min_terminated_length": 126.8, "epoch": 1.5375932800839989, "grad_norm": 0.003272005822509527, "learning_rate": 2.403846153846154e-06, "loss": 0.0027, "num_tokens": 1665426583.0, "reward": 1.0096034407615662, "reward_std": 0.09776319563388824, "rewards/accuracy_reward": 0.7085069417953491, "rewards/brier_reward": 0.8340772271156311, "rewards/confidence_uniqueness_reward": 0.9471764445304871, "rewards/format_reward": 0.9988715291023255, "rewards/frontier_coverage_0": 0.0319485223852098, "rewards/frontier_coverage_1": 0.0319485223852098, "rewards/frontier_coverage_10": 0.04951897189021111, "rewards/frontier_coverage_15": 0.10320504754781723, "rewards/frontier_coverage_20": 0.17310980558395386, "rewards/frontier_coverage_25": 0.25380962789058686, "rewards/frontier_coverage_5": 0.032035707216709855, "rewards/frontier_entropy_batch_reward": -0.3187188684940338, "signal/accuracy_reward/centered_abs_mean": 0.11482204794883728, "signal/accuracy_reward/group_std_mean": 0.15133090913295746, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0013312339782714, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05741102397441864, "signal/advantage_abs_mean": 0.7768993496894836, "signal/advantage_pre_scale_abs_mean": 0.07542656362056732, "signal/advantage_pre_scale_std": 0.12090198844671249, "signal/advantage_std": 0.9827402710914612, "signal/brier_reward/centered_abs_mean": 0.10435597896575928, "signal/brier_reward/group_std_mean": 0.13554692268371582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18235518038272858, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010435598157346248, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016541999764740467, "signal/confidence_uniqueness_reward/group_std_mean": 0.023310190439224242, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0289510115981102, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016541999299079179, "signal/format_reward/centered_abs_mean": 0.0021213107858784495, "signal/format_reward/group_std_mean": 0.005352780409157276, "signal/format_reward/group_zero_std_frac": 0.9722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018210524041205645, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010606553929392248, "signal/frontier_coverage_0/centered_abs_mean": 0.15110780000686647, "signal/frontier_coverage_0/group_std_mean": 0.19436927139759064, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03769223988056183, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021608415991067886, "signal/frontier_coverage_1/centered_abs_mean": 0.15110780000686647, "signal/frontier_coverage_1/group_std_mean": 0.19436927139759064, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03769223988056183, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021608415991067886, "signal/frontier_coverage_10/centered_abs_mean": 0.060875777155160904, "signal/frontier_coverage_10/group_std_mean": 0.07657611966133118, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015234320424497128, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008705236483365298, "signal/frontier_coverage_15/centered_abs_mean": 0.07580193877220154, "signal/frontier_coverage_15/group_std_mean": 0.09422909021377564, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019024584069848062, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010839677881449461, "signal/frontier_coverage_20/centered_abs_mean": 0.10344732105731964, "signal/frontier_coverage_20/group_std_mean": 0.130024753510952, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02597166895866394, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014792966656386852, "signal/frontier_coverage_25/centered_abs_mean": 0.1384286493062973, "signal/frontier_coverage_25/group_std_mean": 0.1750235766172409, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.034739000350236894, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001979529578238726, "signal/frontier_coverage_5/centered_abs_mean": 0.15033363848924636, "signal/frontier_coverage_5/group_std_mean": 0.19340720176696777, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03749907538294792, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021497709909453987, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32275074124336245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3881240785121918, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5654678821563721, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03227507434785366, "step": 640 }, { "calibration/aurc": 0.1632617220818313, "calibration/batch_distribution_entropy": 0.9397674467638419, "calibration/buffer_distribution_entropy": 0.9831640113121327, "calibration/confidence_entropy": 0.49047782580285765, "calibration/coverage@0%": 0.020836053089643168, "calibration/coverage@1%": 0.020836053089643168, "calibration/coverage@10%": 0.2645860530896432, "calibration/coverage@15%": 0.6326425152306354, "calibration/coverage@20%": 0.7118798955613578, "calibration/coverage@25%": 0.7827485857267188, "calibration/coverage@30%": 0.9848645561357703, "calibration/coverage@5%": 0.10208605308964318, "calibration/ece": 0.1834517613740209, "calibration/mean_confidence": 0.6010985707734987, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006076388888889061, "completions/max_length": 3129.0, "completions/max_terminated_length": 3129.0, "completions/mean_length": 815.459033203125, "completions/mean_terminated_length": 815.9496215820312, "completions/min_length": 0.0, "completions/min_terminated_length": 161.0, "epoch": 1.5495931300858738, "grad_norm": 0.0032203816808760166, "learning_rate": 2.373798076923077e-06, "loss": 0.0031, "num_tokens": 1677919711.0, "reward": 1.0256015539169312, "reward_std": 0.09568341672420502, "rewards/accuracy_reward": 0.7515625, "rewards/brier_reward": 0.8331769466400146, "rewards/confidence_uniqueness_reward": 0.9456747055053711, "rewards/format_reward": 0.9993923544883728, "rewards/frontier_coverage_0": 0.0019667490385472776, "rewards/frontier_coverage_1": 0.0019667490385472776, "rewards/frontier_coverage_10": 0.04397192746400833, "rewards/frontier_coverage_15": 0.10374006181955338, "rewards/frontier_coverage_20": 0.17997487783432006, "rewards/frontier_coverage_25": 0.2689637690782547, "rewards/frontier_coverage_5": 0.002235945500433445, "rewards/frontier_entropy_batch_reward": -0.36381399631500244, "signal/accuracy_reward/centered_abs_mean": 0.10557725727558136, "signal/accuracy_reward/group_std_mean": 0.14509375542402267, "signal/accuracy_reward/group_zero_std_frac": 0.5611111164093018, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.873564088344574, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05278862863779068, "signal/advantage_abs_mean": 0.7705256938934326, "signal/advantage_pre_scale_abs_mean": 0.07288682162761688, "signal/advantage_pre_scale_std": 0.1167471945285797, "signal/advantage_std": 0.9828287601470947, "signal/brier_reward/centered_abs_mean": 0.10486017912626266, "signal/brier_reward/group_std_mean": 0.13558290153741837, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17376516461372377, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01048601809889078, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015681835077703, "signal/confidence_uniqueness_reward/group_std_mean": 0.02083849869668484, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026080520078539848, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015681835589930414, "signal/format_reward/centered_abs_mean": 0.0011447482742369176, "signal/format_reward/group_std_mean": 0.0027868092060089112, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.00944533757865429, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0005723741371184588, "signal/frontier_coverage_0/centered_abs_mean": 0.13758010864257814, "signal/frontier_coverage_0/group_std_mean": 0.17860827147960662, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03260061703622341, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019673954462632536, "signal/frontier_coverage_1/centered_abs_mean": 0.13758010864257814, "signal/frontier_coverage_1/group_std_mean": 0.17860827147960662, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03260061703622341, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019673954462632536, "signal/frontier_coverage_10/centered_abs_mean": 0.05716334953904152, "signal/frontier_coverage_10/group_std_mean": 0.0721943661570549, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013553774170577525, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008174359099939466, "signal/frontier_coverage_15/centered_abs_mean": 0.08003035187721252, "signal/frontier_coverage_15/group_std_mean": 0.09915542453527451, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01901390254497528, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011444339528679849, "signal/frontier_coverage_20/centered_abs_mean": 0.11281442493200303, "signal/frontier_coverage_20/group_std_mean": 0.14117856323719025, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026818398758769034, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016132463002577423, "signal/frontier_coverage_25/centered_abs_mean": 0.15117188692092895, "signal/frontier_coverage_25/group_std_mean": 0.19031570255756378, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0359354741871357, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002161757950671017, "signal/frontier_coverage_5/centered_abs_mean": 0.13697426319122313, "signal/frontier_coverage_5/group_std_mean": 0.17787103354930878, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032456709817051885, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019587320508435368, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3425477683544159, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40490528345108034, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5694766044616699, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034254778921604154, "step": 645 }, { "calibration/aurc": 0.19994838141742183, "calibration/batch_distribution_entropy": 0.9632098414546952, "calibration/buffer_distribution_entropy": 0.9829315610905208, "calibration/confidence_entropy": 0.4787305902270136, "calibration/coverage@0%": 0.053234075043630015, "calibration/coverage@1%": 0.053234075043630015, "calibration/coverage@10%": 0.23770451570680629, "calibration/coverage@15%": 0.33285067626527054, "calibration/coverage@20%": 0.5511071116928448, "calibration/coverage@25%": 0.7573271160558463, "calibration/coverage@30%": 0.8289839659685864, "calibration/coverage@5%": 0.1424956369982548, "calibration/ece": 0.18254347376199828, "calibration/mean_confidence": 0.49279760826243457, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00277777777777779, "completions/max_length": 3912.0, "completions/max_terminated_length": 3912.0, "completions/mean_length": 1008.4895629882812, "completions/mean_terminated_length": 1011.3613891601562, "completions/min_length": 0.0, "completions/min_terminated_length": 169.2, "epoch": 1.5615929800877488, "grad_norm": 0.0029296150896698236, "learning_rate": 2.3437500000000002e-06, "loss": 0.0108, "num_tokens": 1692632999.0, "reward": 1.0104209780693054, "reward_std": 0.10768526047468185, "rewards/accuracy_reward": 0.7127604126930237, "rewards/brier_reward": 0.8133676528930665, "rewards/confidence_uniqueness_reward": 0.9476563215255738, "rewards/format_reward": 0.9972222328186036, "rewards/frontier_coverage_0": 0.0160905129625462, "rewards/frontier_coverage_1": 0.0160905129625462, "rewards/frontier_coverage_10": 0.04221442565321922, "rewards/frontier_coverage_15": 0.08863531947135925, "rewards/frontier_coverage_20": 0.15275568068027495, "rewards/frontier_coverage_25": 0.23076119422912597, "rewards/frontier_coverage_5": 0.016149751842021942, "rewards/frontier_entropy_batch_reward": -0.2871933221817017, "signal/accuracy_reward/centered_abs_mean": 0.13562825918197632, "signal/accuracy_reward/group_std_mean": 0.1758606731891632, "signal/accuracy_reward/group_zero_std_frac": 0.5083333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0496046185493468, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06781412959098816, "signal/advantage_abs_mean": 0.7706871032714844, "signal/advantage_pre_scale_abs_mean": 0.08306800574064255, "signal/advantage_pre_scale_std": 0.13049945682287217, "signal/advantage_std": 0.9829372525215149, "signal/brier_reward/centered_abs_mean": 0.12161406874656677, "signal/brier_reward/group_std_mean": 0.1559804707765579, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1888038247823715, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012161407060921193, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018055187538266182, "signal/confidence_uniqueness_reward/group_std_mean": 0.026942530646920204, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02799047380685806, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018055187771096825, "signal/format_reward/centered_abs_mean": 0.005078125023283064, "signal/format_reward/group_std_mean": 0.010953563638031483, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03934686332941055, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002539062511641532, "signal/frontier_coverage_0/centered_abs_mean": 0.18165695369243623, "signal/frontier_coverage_0/group_std_mean": 0.23133923709392548, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04036081805825233, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025976944249123334, "signal/frontier_coverage_1/centered_abs_mean": 0.18165695369243623, "signal/frontier_coverage_1/group_std_mean": 0.23133923709392548, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04036081805825233, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025976944249123334, "signal/frontier_coverage_10/centered_abs_mean": 0.06570319607853889, "signal/frontier_coverage_10/group_std_mean": 0.08306444734334946, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014596488140523434, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009395557222887874, "signal/frontier_coverage_15/centered_abs_mean": 0.07579767853021621, "signal/frontier_coverage_15/group_std_mean": 0.09315522462129593, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016809598729014396, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010839068330824376, "signal/frontier_coverage_20/centered_abs_mean": 0.10389769673347474, "signal/frontier_coverage_20/group_std_mean": 0.12809243351221083, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023021703585982323, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014857371104881167, "signal/frontier_coverage_25/centered_abs_mean": 0.1412452608346939, "signal/frontier_coverage_25/group_std_mean": 0.1752469062805176, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031283880770206454, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020198072539642452, "signal/frontier_coverage_5/centered_abs_mean": 0.1812539279460907, "signal/frontier_coverage_5/group_std_mean": 0.23083524107933046, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040271298587322236, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0025919311214238406, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3169364869594574, "signal/frontier_entropy_batch_reward/group_std_mean": 0.384937995672226, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.491671484708786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031693648919463156, "step": 650 }, { "epoch": 1.5615929800877488, "eval_calibration/aurc": 0.1258854326762919, "eval_calibration/batch_distribution_entropy": 0.92262165151983, "eval_calibration/buffer_distribution_entropy": 0.9839670771333515, "eval_calibration/confidence_entropy": 0.5051808162676946, "eval_calibration/coverage@0%": 0.22916666666666666, "eval_calibration/coverage@1%": 0.22916666666666666, "eval_calibration/coverage@10%": 0.546875, "eval_calibration/coverage@15%": 0.6302083333333334, "eval_calibration/coverage@20%": 0.8125, "eval_calibration/coverage@25%": 0.9479166666666666, "eval_calibration/coverage@30%": 0.9635416666666666, "eval_calibration/coverage@5%": 0.421875, "eval_calibration/ece": 0.28119635416666666, "eval_calibration/mean_confidence": 0.5181848958333334, "eval_completions/clipped_ratio": 0.006944444444444457, "eval_completions/max_length": 3462.6666666666665, "eval_completions/max_terminated_length": 3462.6666666666665, "eval_completions/mean_length": 1139.494120279948, "eval_completions/mean_terminated_length": 1147.5079345703125, "eval_completions/min_length": 89.66666666666667, "eval_completions/min_terminated_length": 220.0, "eval_loss": 0.0, "eval_num_tokens": 1692632999.0, "eval_reward": 0.9208190242449442, "eval_reward_std": 0.23059933632612228, "eval_rewards/accuracy_reward": 0.6909722288449606, "eval_rewards/brier_reward": 0.8101545870304108, "eval_rewards/confidence_uniqueness_reward": 0.8898043135801951, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/frontier_coverage_0": 0.03612114832503721, "eval_rewards/frontier_coverage_1": 0.03612114832503721, "eval_rewards/frontier_coverage_10": 0.04670971849312385, "eval_rewards/frontier_coverage_15": 0.08305741598208745, "eval_rewards/frontier_coverage_20": 0.14025516683856645, "eval_rewards/frontier_coverage_25": 0.21333598345518112, "eval_rewards/frontier_coverage_5": 0.03614697029115632, "eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074, "eval_runtime": 214.7999, "eval_samples_per_second": 4.655, "eval_signal/accuracy_reward/centered_abs_mean": 0.4059244791666667, "eval_signal/accuracy_reward/group_std_mean": 0.4549813171227773, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8949279487133026, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20296223958333334, "eval_signal/advantage_abs_mean": 0.8530746897061666, "eval_signal/advantage_pre_scale_abs_mean": 0.1983613446354866, "eval_signal/advantage_pre_scale_std": 0.22935798267523447, "eval_signal/advantage_std": 0.9863868057727814, "eval_signal/brier_reward/centered_abs_mean": 0.17868993182977042, "eval_signal/brier_reward/group_std_mean": 0.24043517063061395, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07871770237882932, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01786899333819747, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04949278508623441, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07535891359051068, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021756678509215515, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004949278353403012, "eval_signal/format_reward/centered_abs_mean": 0.015028211753815413, "eval_signal/format_reward/group_std_mean": 0.04120476512859265, "eval_signal/format_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0320138872290651, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007514105876907706, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.32204001148541767, "eval_signal/frontier_coverage_0/group_std_mean": 0.4291209429502487, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.02039930286506812, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.004605172357211511, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.32204001148541767, "eval_signal/frontier_coverage_1/group_std_mean": 0.4291209429502487, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.02039930286506812, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.004605172357211511, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.092753649999698, "eval_signal/frontier_coverage_10/group_std_mean": 0.12686272462209067, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0058677659059564275, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013263771737304826, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.09765455995996793, "eval_signal/frontier_coverage_15/group_std_mean": 0.12701285382111868, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00617311514603595, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013964602064030867, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.16553593426942825, "eval_signal/frontier_coverage_20/group_std_mean": 0.20874186108509699, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.010455410151431957, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002367163930709163, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.24852682650089264, "eval_signal/frontier_coverage_25/group_std_mean": 0.3072594503561656, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01568988710641861, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0035539336192111173, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.32129410405953723, "eval_signal/frontier_coverage_5/group_std_mean": 0.42822254200776416, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.020351968084772427, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0045945055317133665, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.015028211753815413, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.04120476512859265, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.7777778009573618, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.006402777663121621, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0015028212607527773, "eval_steps_per_second": 0.028, "step": 650 }, { "epoch": 1.5615929800877488, "step": 650, "train_probe_calibration/aurc": 0.1050277420453154, "train_probe_calibration/batch_distribution_entropy": 0.92016610975414, "train_probe_calibration/buffer_distribution_entropy": 0.9841441810954885, "train_probe_calibration/confidence_entropy": 0.5080130716251837, "train_probe_calibration/coverage@0%": 0.3111559139784946, "train_probe_calibration/coverage@1%": 0.3111559139784946, "train_probe_calibration/coverage@10%": 0.5475470430107526, "train_probe_calibration/coverage@15%": 0.7043010752688171, "train_probe_calibration/coverage@20%": 0.8738239247311829, "train_probe_calibration/coverage@25%": 0.9529569892473119, "train_probe_calibration/coverage@30%": 0.9791666666666666, "train_probe_calibration/coverage@5%": 0.363239247311828, "train_probe_calibration/ece": 0.22604252352150536, "train_probe_calibration/mean_confidence": 0.5218310987903226, "train_probe_completions/clipped_ratio": 0.005208333333333315, "train_probe_completions/max_length": 3275.0, "train_probe_completions/max_terminated_length": 3275.0, "train_probe_completions/mean_length": 1149.7640787760417, "train_probe_completions/mean_terminated_length": 1155.8470865885417, "train_probe_completions/min_length": 91.83333333333333, "train_probe_completions/min_terminated_length": 190.16666666666666, "train_probe_loss": 0.0, "train_probe_num_tokens": 1692632999.0, "train_probe_reward": 0.9475771685441335, "train_probe_reward_std": 0.21386076509952545, "train_probe_rewards/accuracy_reward": 0.7439236144224802, "train_probe_rewards/brier_reward": 0.8085533181826273, "train_probe_rewards/confidence_uniqueness_reward": 0.8970864415168762, "train_probe_rewards/format_reward": 0.9947916567325592, "train_probe_rewards/frontier_coverage_0": -0.004265061909488092, "train_probe_rewards/frontier_coverage_1": -0.004265061909488092, "train_probe_rewards/frontier_coverage_10": 0.0401507547746102, "train_probe_rewards/frontier_coverage_15": 0.087270587682724, "train_probe_rewards/frontier_coverage_20": 0.15156510472297668, "train_probe_rewards/frontier_coverage_25": 0.23263747741778693, "train_probe_rewards/frontier_coverage_5": -0.004163547380206485, "train_probe_rewards/frontier_entropy_batch_reward": -0.9947916567325592, "train_probe_runtime": 207.3917, "train_probe_samples_per_second": 4.822, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.36865234375, "train_probe_signal/accuracy_reward/group_std_mean": 0.4333516408999761, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8816647529602051, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.184326171875, "train_probe_signal/advantage_abs_mean": 0.8192966183026632, "train_probe_signal/advantage_pre_scale_abs_mean": 0.1771583134929339, "train_probe_signal/advantage_pre_scale_std": 0.21326116969188055, "train_probe_signal/advantage_std": 0.9863499303658804, "train_probe_signal/brier_reward/centered_abs_mean": 0.17456327378749847, "train_probe_signal/brier_reward/group_std_mean": 0.2288526544968287, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.0834679293135802, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.017456327254573505, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04140195933481058, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.062413097048799195, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019774133029083412, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004140196174072723, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333432674408, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022984805206457775, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.3087007204691569, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.42339272300402325, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.021187719888985157, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0044144203420728445, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.3087007204691569, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.42339272300402325, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.021187719888985157, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0044144203420728445, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09169654672344525, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.12483824168642361, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006285120112200578, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0013112605665810406, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.09570210054516792, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.12248214582602183, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.006561545344690482, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013685400481335819, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.1610035002231598, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.19909277806679407, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011035082396119833, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0023023500107228756, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.23792067666848501, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.2904689262310664, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.016299090658624966, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0034022655648489795, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.3079666793346405, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.4224870850642522, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02113716086993615, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00440392301728328, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333432674408, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004596961506952842, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091145910943549, "train_probe_steps_per_second": 0.029 }, { "calibration/aurc": 0.08886764164430172, "calibration/batch_distribution_entropy": 0.9755997519212493, "calibration/buffer_distribution_entropy": 0.9844547186954102, "calibration/confidence_entropy": 0.47347940093345214, "calibration/coverage@0%": 0.2267192725398695, "calibration/coverage@1%": 0.28414543623541816, "calibration/coverage@10%": 0.6487030533794279, "calibration/coverage@15%": 0.7547508466709868, "calibration/coverage@20%": 0.8620347968052193, "calibration/coverage@25%": 0.9512310997257731, "calibration/coverage@30%": 0.9786096256684491, "calibration/coverage@5%": 0.4562793073317689, "calibration/ece": 0.2560435482025544, "calibration/mean_confidence": 0.4828333317576958, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013107638888888884, "completions/max_length": 4042.2, "completions/max_terminated_length": 4042.2, "completions/mean_length": 1245.147509765625, "completions/mean_terminated_length": 1262.2794189453125, "completions/min_length": 0.0, "completions/min_terminated_length": 214.0, "epoch": 1.5735928300896238, "grad_norm": 0.0022649674210697412, "learning_rate": 2.3137019230769236e-06, "loss": -0.0295, "num_tokens": 1710094378.0, "reward": 1.0154491662979126, "reward_std": 0.12489555925130844, "rewards/accuracy_reward": 0.735156238079071, "rewards/brier_reward": 0.7945853352546692, "rewards/confidence_uniqueness_reward": 0.9397654891014099, "rewards/format_reward": 0.9868923544883728, "rewards/frontier_coverage_0": -0.013256353419274091, "rewards/frontier_coverage_1": -0.013256353419274091, "rewards/frontier_coverage_10": 0.035689426213502885, "rewards/frontier_coverage_15": 0.08503572195768357, "rewards/frontier_coverage_20": 0.15036341547966003, "rewards/frontier_coverage_25": 0.23061252534389495, "rewards/frontier_coverage_5": -0.013073560688644648, "rewards/frontier_entropy_batch_reward": -0.2561847805976868, "signal/accuracy_reward/centered_abs_mean": 0.13704969435930253, "signal/accuracy_reward/group_std_mean": 0.18030084669589996, "signal/accuracy_reward/group_zero_std_frac": 0.4944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.976815402507782, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06852484717965127, "signal/advantage_abs_mean": 0.7512982010841369, "signal/advantage_pre_scale_abs_mean": 0.09129708409309387, "signal/advantage_pre_scale_std": 0.1536231279373169, "signal/advantage_std": 0.9830477714538575, "signal/brier_reward/centered_abs_mean": 0.12865829318761826, "signal/brier_reward/group_std_mean": 0.1670507937669754, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1841784566640854, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012865828722715378, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03297282736748457, "signal/confidence_uniqueness_reward/group_std_mean": 0.054193584248423576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0467866700142622, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0032972827553749084, "signal/format_reward/centered_abs_mean": 0.02270507828798145, "signal/format_reward/group_std_mean": 0.04204718470573425, "signal/format_reward/group_zero_std_frac": 0.8333333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1594323130324483, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.011352539143990726, "signal/frontier_coverage_0/centered_abs_mean": 0.18524830043315887, "signal/frontier_coverage_0/group_std_mean": 0.23942944705486296, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03790371045470238, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002649050671607256, "signal/frontier_coverage_1/centered_abs_mean": 0.18524830043315887, "signal/frontier_coverage_1/group_std_mean": 0.23942944705486296, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03790371045470238, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002649050671607256, "signal/frontier_coverage_10/centered_abs_mean": 0.06545519232749938, "signal/frontier_coverage_10/group_std_mean": 0.08360625356435776, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013402448035776615, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009360092226415873, "signal/frontier_coverage_15/centered_abs_mean": 0.07041083574295044, "signal/frontier_coverage_15/group_std_mean": 0.08784731775522232, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014487495459616185, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001006874954327941, "signal/frontier_coverage_20/centered_abs_mean": 0.0951210230588913, "signal/frontier_coverage_20/group_std_mean": 0.1187201201915741, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0196021169424057, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001360230566933751, "signal/frontier_coverage_25/centered_abs_mean": 0.1295736938714981, "signal/frontier_coverage_25/group_std_mean": 0.16238105595111846, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02669799067080021, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018529037246480585, "signal/frontier_coverage_5/centered_abs_mean": 0.18478002846240998, "signal/frontier_coverage_5/group_std_mean": 0.2388526976108551, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03780748248100281, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002642354369163513, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3182023406028748, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3871363937854767, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45689463019371035, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03182023465633392, "step": 655 }, { "calibration/aurc": 0.09533119410246484, "calibration/batch_distribution_entropy": 0.9460154236206373, "calibration/buffer_distribution_entropy": 0.9849437384133127, "calibration/confidence_entropy": 0.4857657950035287, "calibration/coverage@0%": 0.1084363058159143, "calibration/coverage@1%": 0.11890462537514294, "calibration/coverage@10%": 0.5864796812562145, "calibration/coverage@15%": 0.8011386517845708, "calibration/coverage@20%": 0.9112986932632948, "calibration/coverage@25%": 0.9652162001735081, "calibration/coverage@30%": 0.9883977900552485, "calibration/coverage@5%": 0.3859965976927132, "calibration/ece": 0.15954822888034606, "calibration/mean_confidence": 0.6012927915175033, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029861111111111116, "completions/max_length": 4036.6, "completions/max_terminated_length": 4036.6, "completions/mean_length": 1316.7138916015624, "completions/mean_terminated_length": 1357.5852783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 223.2, "epoch": 1.5855926800914988, "grad_norm": 0.0021101038437336683, "learning_rate": 2.283653846153846e-06, "loss": -0.0947, "num_tokens": 1728357482.0, "reward": 0.9971134901046753, "reward_std": 0.1492151975631714, "rewards/accuracy_reward": 0.7189236044883728, "rewards/brier_reward": 0.8177730441093445, "rewards/confidence_uniqueness_reward": 0.9205069661140441, "rewards/format_reward": 0.9701388835906982, "rewards/frontier_coverage_0": 0.018412799527868628, "rewards/frontier_coverage_1": 0.018412799527868628, "rewards/frontier_coverage_10": 0.04661319591104984, "rewards/frontier_coverage_15": 0.10446333140134811, "rewards/frontier_coverage_20": 0.1796988695859909, "rewards/frontier_coverage_25": 0.26911273002624514, "rewards/frontier_coverage_5": 0.0184929336886853, "rewards/frontier_entropy_batch_reward": -0.30615225434303284, "signal/accuracy_reward/centered_abs_mean": 0.1266710117459297, "signal/accuracy_reward/group_std_mean": 0.175567626953125, "signal/accuracy_reward/group_zero_std_frac": 0.4638888895511627, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8586621403694152, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06333550587296485, "signal/advantage_abs_mean": 0.7222093343734741, "signal/advantage_pre_scale_abs_mean": 0.1041327103972435, "signal/advantage_pre_scale_std": 0.18754582107067108, "signal/advantage_std": 0.983126699924469, "signal/brier_reward/centered_abs_mean": 0.12975584119558334, "signal/brier_reward/group_std_mean": 0.17287274599075317, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1763177275657654, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012975584715604782, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05719666481018067, "signal/confidence_uniqueness_reward/group_std_mean": 0.09497420340776444, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07740743607282638, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005719666136428714, "signal/format_reward/centered_abs_mean": 0.04908854141831398, "signal/format_reward/group_std_mean": 0.08599354475736617, "signal/format_reward/group_zero_std_frac": 0.6722222208976746, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.331484454870224, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02454427070915699, "signal/frontier_coverage_0/centered_abs_mean": 0.14936257898807526, "signal/frontier_coverage_0/group_std_mean": 0.1966366797685623, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.029026806727051734, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021358849480748177, "signal/frontier_coverage_1/centered_abs_mean": 0.14936257898807526, "signal/frontier_coverage_1/group_std_mean": 0.1966366797685623, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.029026806727051734, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021358849480748177, "signal/frontier_coverage_10/centered_abs_mean": 0.05857866555452347, "signal/frontier_coverage_10/group_std_mean": 0.07446140795946121, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011402043513953686, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008376748883165419, "signal/frontier_coverage_15/centered_abs_mean": 0.07696539610624313, "signal/frontier_coverage_15/group_std_mean": 0.09492502957582474, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014994030632078648, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011006051674485207, "signal/frontier_coverage_20/centered_abs_mean": 0.1082698255777359, "signal/frontier_coverage_20/group_std_mean": 0.1335964471101761, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02107783704996109, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015482584713026881, "signal/frontier_coverage_25/centered_abs_mean": 0.1468362033367157, "signal/frontier_coverage_25/group_std_mean": 0.1822466194629669, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02857258655130863, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002099757781252265, "signal/frontier_coverage_5/centered_abs_mean": 0.1490258753299713, "signal/frontier_coverage_5/group_std_mean": 0.1962038218975067, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028961464390158655, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021310700103640556, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.330526864528656, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39485923647880555, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.45007047057151794, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305268660187721, "step": 660 }, { "calibration/aurc": 0.09601976406181807, "calibration/batch_distribution_entropy": 0.9514238208166249, "calibration/buffer_distribution_entropy": 0.9848316736675014, "calibration/confidence_entropy": 0.47648112021445366, "calibration/coverage@0%": 0.151898181384355, "calibration/coverage@1%": 0.2935527003347923, "calibration/coverage@10%": 0.5848106517141017, "calibration/coverage@15%": 0.7099468030690537, "calibration/coverage@20%": 0.8737425404944587, "calibration/coverage@25%": 0.9047229326513214, "calibration/coverage@30%": 0.9556436487638533, "calibration/coverage@5%": 0.5147975095628313, "calibration/ece": 0.2035095831173826, "calibration/mean_confidence": 0.6139547713848655, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.06501736111111112, "completions/max_length": 4070.4, "completions/max_terminated_length": 4070.4, "completions/mean_length": 1302.47275390625, "completions/mean_terminated_length": 1393.196337890625, "completions/min_length": 0.0, "completions/min_terminated_length": 194.0, "epoch": 1.5975925300933738, "grad_norm": 0.0020804195664823055, "learning_rate": 2.2536057692307694e-06, "loss": -0.1813, "num_tokens": 1746472944.0, "reward": 0.9607110142707824, "reward_std": 0.19843303859233857, "rewards/accuracy_reward": 0.6991319417953491, "rewards/brier_reward": 0.7799984812736511, "rewards/confidence_uniqueness_reward": 0.8859006881713867, "rewards/format_reward": 0.9348958492279053, "rewards/frontier_coverage_0": 0.005472905747592449, "rewards/frontier_coverage_1": 0.005472905747592449, "rewards/frontier_coverage_10": 0.04270212613046169, "rewards/frontier_coverage_15": 0.1018251746892929, "rewards/frontier_coverage_20": 0.17534002363681794, "rewards/frontier_coverage_25": 0.2618106693029404, "rewards/frontier_coverage_5": 0.0055509466677904126, "rewards/frontier_entropy_batch_reward": -0.314467066526413, "signal/accuracy_reward/centered_abs_mean": 0.15463324785232543, "signal/accuracy_reward/group_std_mean": 0.20535095334053038, "signal/accuracy_reward/group_zero_std_frac": 0.40277778506278994, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8738274693489074, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07731662392616272, "signal/advantage_abs_mean": 0.722335159778595, "signal/advantage_pre_scale_abs_mean": 0.14205503165721894, "signal/advantage_pre_scale_std": 0.24266441464424132, "signal/advantage_std": 0.983333969116211, "signal/brier_reward/centered_abs_mean": 0.152623775601387, "signal/brier_reward/group_std_mean": 0.2023110032081604, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17406882345676422, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015262378007173538, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.10098161250352859, "signal/confidence_uniqueness_reward/group_std_mean": 0.15298269987106322, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.11472240090370178, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.010098160803318023, "signal/format_reward/centered_abs_mean": 0.0970920130610466, "signal/format_reward/group_std_mean": 0.1495143711566925, "signal/format_reward/group_zero_std_frac": 0.49444445967674255, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5503052711486817, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0485460065305233, "signal/frontier_coverage_0/centered_abs_mean": 0.15222309529781342, "signal/frontier_coverage_0/group_std_mean": 0.19828734695911407, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.024861392751336097, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002176790265366435, "signal/frontier_coverage_1/centered_abs_mean": 0.15222309529781342, "signal/frontier_coverage_1/group_std_mean": 0.19828734695911407, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.024861392751336097, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002176790265366435, "signal/frontier_coverage_10/centered_abs_mean": 0.05907100513577461, "signal/frontier_coverage_10/group_std_mean": 0.07500900477170944, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.009708347730338573, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008447154192253947, "signal/frontier_coverage_15/centered_abs_mean": 0.07548296004533768, "signal/frontier_coverage_15/group_std_mean": 0.09401365518569946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01242184229195118, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010794063098728656, "signal/frontier_coverage_20/centered_abs_mean": 0.10607408285140991, "signal/frontier_coverage_20/group_std_mean": 0.13229668736457825, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01742022316902876, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015168593730777503, "signal/frontier_coverage_25/centered_abs_mean": 0.14476778507232665, "signal/frontier_coverage_25/group_std_mean": 0.1815311759710312, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.023732788860797882, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020701792556792496, "signal/frontier_coverage_5/centered_abs_mean": 0.15183787047863007, "signal/frontier_coverage_5/group_std_mean": 0.19780696630477906, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02479843869805336, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002171281585469842, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3288115680217743, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39475221037864683, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.37834363579750063, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03288115821778774, "step": 665 }, { "calibration/aurc": 0.03806984555271315, "calibration/batch_distribution_entropy": 0.9203345601826488, "calibration/buffer_distribution_entropy": 0.9832592180175089, "calibration/confidence_entropy": 0.4811035553222265, "calibration/coverage@0%": 0.2287304922155279, "calibration/coverage@1%": 0.3258552462180947, "calibration/coverage@10%": 0.9037652661992339, "calibration/coverage@15%": 0.9642066683779698, "calibration/coverage@20%": 0.9946949602122016, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.7852124285822694, "calibration/ece": 0.19911374108801527, "calibration/mean_confidence": 0.6586080802205992, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03255208333333333, "completions/max_length": 4073.6, "completions/max_terminated_length": 4073.6, "completions/mean_length": 1383.228759765625, "completions/mean_terminated_length": 1428.3281982421875, "completions/min_length": 0.0, "completions/min_terminated_length": 273.6, "epoch": 1.6095923800952487, "grad_norm": 0.0021786438301205635, "learning_rate": 2.2235576923076924e-06, "loss": -0.0947, "num_tokens": 1765514843.0, "reward": 0.9914361715316773, "reward_std": 0.1526610553264618, "rewards/accuracy_reward": 0.7119791746139527, "rewards/brier_reward": 0.8093498826026917, "rewards/confidence_uniqueness_reward": 0.918257987499237, "rewards/format_reward": 0.9673611164093018, "rewards/frontier_coverage_0": 0.01678692139685154, "rewards/frontier_coverage_1": 0.01678692139685154, "rewards/frontier_coverage_10": 0.04643819592893124, "rewards/frontier_coverage_15": 0.10272331386804581, "rewards/frontier_coverage_20": 0.17622337937355043, "rewards/frontier_coverage_25": 0.2643666982650757, "rewards/frontier_coverage_5": 0.01684805955737829, "rewards/frontier_entropy_batch_reward": -0.3014924913644791, "signal/accuracy_reward/centered_abs_mean": 0.1404839426279068, "signal/accuracy_reward/group_std_mean": 0.18494315445423126, "signal/accuracy_reward/group_zero_std_frac": 0.475, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.964840543270111, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0702419713139534, "signal/advantage_abs_mean": 0.7429413557052612, "signal/advantage_pre_scale_abs_mean": 0.10971628427505493, "signal/advantage_pre_scale_std": 0.19166867136955262, "signal/advantage_std": 0.9831172704696656, "signal/brier_reward/centered_abs_mean": 0.12622719258069992, "signal/brier_reward/group_std_mean": 0.16899799108505248, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17211044132709502, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012622719258069992, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05944109708070755, "signal/confidence_uniqueness_reward/group_std_mean": 0.0957409456372261, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08047307804226875, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005944109987467527, "signal/format_reward/centered_abs_mean": 0.05112847089767456, "signal/format_reward/group_std_mean": 0.08646547794342041, "signal/format_reward/group_zero_std_frac": 0.6805555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.34511254727840424, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02556423544883728, "signal/frontier_coverage_0/centered_abs_mean": 0.1582293063402176, "signal/frontier_coverage_0/group_std_mean": 0.20548607409000397, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.030993625149130823, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022626791149377825, "signal/frontier_coverage_1/centered_abs_mean": 0.1582293063402176, "signal/frontier_coverage_1/group_std_mean": 0.20548607409000397, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.030993625149130823, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022626791149377825, "signal/frontier_coverage_10/centered_abs_mean": 0.05960306078195572, "signal/frontier_coverage_10/group_std_mean": 0.07558953166007995, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011655074357986451, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008523237425833941, "signal/frontier_coverage_15/centered_abs_mean": 0.0737259179353714, "signal/frontier_coverage_15/group_std_mean": 0.09147704541683196, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.014405173435807227, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001054280623793602, "signal/frontier_coverage_20/centered_abs_mean": 0.102665276825428, "signal/frontier_coverage_20/group_std_mean": 0.1282331794500351, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020072196424007416, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014681134838610888, "signal/frontier_coverage_25/centered_abs_mean": 0.14084968566894532, "signal/frontier_coverage_25/group_std_mean": 0.1767154097557068, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027554494515061378, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002014150540344417, "signal/frontier_coverage_5/centered_abs_mean": 0.15790065228939057, "signal/frontier_coverage_5/group_std_mean": 0.2050785392522812, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030929455906152724, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022579793119803073, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32375689744949343, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3865199089050293, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.44135147929191587, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03237569145858288, "step": 670 }, { "calibration/aurc": 0.03729969376431475, "calibration/batch_distribution_entropy": 0.9389808737258891, "calibration/buffer_distribution_entropy": 0.9826168673707685, "calibration/confidence_entropy": 0.48776771095909294, "calibration/coverage@0%": 0.31871109818981125, "calibration/coverage@1%": 0.5034221627249227, "calibration/coverage@10%": 0.8701168438794713, "calibration/coverage@15%": 0.9513196642868345, "calibration/coverage@20%": 0.995822454308094, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.7422882641289732, "calibration/ece": 0.2569934348082308, "calibration/mean_confidence": 0.6003690197412586, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.016145833333333325, "completions/max_length": 4074.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 1375.4995849609375, "completions/mean_terminated_length": 1399.0270751953126, "completions/min_length": 0.0, "completions/min_terminated_length": 274.4, "epoch": 1.6215922300971237, "grad_norm": 0.002223934279754758, "learning_rate": 2.1935096153846157e-06, "loss": -0.0469, "num_tokens": 1784455926.0, "reward": 1.0157193899154664, "reward_std": 0.12572815269231796, "rewards/accuracy_reward": 0.7405381917953491, "rewards/brier_reward": 0.8042338967323304, "rewards/confidence_uniqueness_reward": 0.9352299451828003, "rewards/format_reward": 0.9837673783302308, "rewards/frontier_coverage_0": -0.008519930252805352, "rewards/frontier_coverage_1": -0.008519930252805352, "rewards/frontier_coverage_10": 0.03989522792398929, "rewards/frontier_coverage_15": 0.09447171092033387, "rewards/frontier_coverage_20": 0.16575570404529572, "rewards/frontier_coverage_25": 0.25334414541721345, "rewards/frontier_coverage_5": -0.008372036268701777, "rewards/frontier_entropy_batch_reward": -0.27930985391139984, "signal/accuracy_reward/centered_abs_mean": 0.12795681357383729, "signal/accuracy_reward/group_std_mean": 0.17028040885925294, "signal/accuracy_reward/group_zero_std_frac": 0.5138889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9479888319969177, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06397840678691864, "signal/advantage_abs_mean": 0.7458012700080872, "signal/advantage_pre_scale_abs_mean": 0.09115839749574661, "signal/advantage_pre_scale_std": 0.15917887091636657, "signal/advantage_std": 0.9829484939575195, "signal/brier_reward/centered_abs_mean": 0.1257360503077507, "signal/brier_reward/group_std_mean": 0.16337400376796724, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18875263929367064, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.0125736054033041, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03575787842273712, "signal/confidence_uniqueness_reward/group_std_mean": 0.059302129596471784, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.052689623832702634, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0035757880192250012, "signal/format_reward/centered_abs_mean": 0.025906032882630825, "signal/format_reward/group_std_mean": 0.047686302289366725, "signal/format_reward/group_zero_std_frac": 0.8083333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18549492359161376, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.012953016441315413, "signal/frontier_coverage_0/centered_abs_mean": 0.1635303020477295, "signal/frontier_coverage_0/group_std_mean": 0.2119036942720413, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035292362421751024, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00233848316129297, "signal/frontier_coverage_1/centered_abs_mean": 0.1635303020477295, "signal/frontier_coverage_1/group_std_mean": 0.2119036942720413, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035292362421751024, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00233848316129297, "signal/frontier_coverage_10/centered_abs_mean": 0.06136737838387489, "signal/frontier_coverage_10/group_std_mean": 0.07794718146324157, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013388168439269066, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008775535272434354, "signal/frontier_coverage_15/centered_abs_mean": 0.07500097304582595, "signal/frontier_coverage_15/group_std_mean": 0.09282598346471786, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016546625830233096, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010725139174610377, "signal/frontier_coverage_20/centered_abs_mean": 0.10340526103973388, "signal/frontier_coverage_20/group_std_mean": 0.1284177213907242, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02281036227941513, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001478695240803063, "signal/frontier_coverage_25/centered_abs_mean": 0.1403847485780716, "signal/frontier_coverage_25/group_std_mean": 0.17535466849803924, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030875445157289506, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020075018983334303, "signal/frontier_coverage_5/centered_abs_mean": 0.16319799721240996, "signal/frontier_coverage_5/group_std_mean": 0.21148067712783813, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035220272839069366, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002333731343969703, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31295692920684814, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3812874913215637, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.482098913192749, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03129569143056869, "step": 675 }, { "calibration/aurc": 0.14541435904658284, "calibration/batch_distribution_entropy": 0.9751293794401581, "calibration/buffer_distribution_entropy": 0.9829700635712373, "calibration/confidence_entropy": 0.4906589674602201, "calibration/coverage@0%": 0.088201334986161, "calibration/coverage@1%": 0.13090966831949433, "calibration/coverage@10%": 0.4090633628879498, "calibration/coverage@15%": 0.65926727430129, "calibration/coverage@20%": 0.7996981703743204, "calibration/coverage@25%": 0.8428572417948488, "calibration/coverage@30%": 0.8786738785658386, "calibration/coverage@5%": 0.29535824941354677, "calibration/ece": 0.16559922901748228, "calibration/mean_confidence": 0.5466112312017535, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0064236111111110935, "completions/max_length": 3937.8, "completions/max_terminated_length": 3937.8, "completions/mean_length": 1338.471533203125, "completions/mean_terminated_length": 1347.26357421875, "completions/min_length": 0.0, "completions/min_terminated_length": 309.6, "epoch": 1.6335920800989987, "grad_norm": 0.002288751769810915, "learning_rate": 2.1634615384615387e-06, "loss": -0.0171, "num_tokens": 1803013230.0, "reward": 1.0145124554634095, "reward_std": 0.1102727472782135, "rewards/accuracy_reward": 0.7199652910232544, "rewards/brier_reward": 0.8168349623680115, "rewards/confidence_uniqueness_reward": 0.9457002997398376, "rewards/format_reward": 0.9935763835906982, "rewards/frontier_coverage_0": 0.013051034219097346, "rewards/frontier_coverage_1": 0.013051034219097346, "rewards/frontier_coverage_10": 0.045426635444164275, "rewards/frontier_coverage_15": 0.0962330624461174, "rewards/frontier_coverage_20": 0.16461943387985228, "rewards/frontier_coverage_25": 0.2484707236289978, "rewards/frontier_coverage_5": 0.013126900864881464, "rewards/frontier_entropy_batch_reward": -0.27005818486213684, "signal/accuracy_reward/centered_abs_mean": 0.13260633796453475, "signal/accuracy_reward/group_std_mean": 0.1719220072031021, "signal/accuracy_reward/group_zero_std_frac": 0.522222238779068, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0306506514549256, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06630316898226737, "signal/advantage_abs_mean": 0.7687285304069519, "signal/advantage_pre_scale_abs_mean": 0.08383222371339798, "signal/advantage_pre_scale_std": 0.136709526181221, "signal/advantage_std": 0.9829292297363281, "signal/brier_reward/centered_abs_mean": 0.11948189288377761, "signal/brier_reward/group_std_mean": 0.1531725823879242, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1861775755882263, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011948189325630664, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02247364744544029, "signal/confidence_uniqueness_reward/group_std_mean": 0.03589537590742111, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0348218347877264, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002247364795766771, "signal/format_reward/centered_abs_mean": 0.010948350746184587, "signal/format_reward/group_std_mean": 0.02192477509379387, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08419957533478736, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0054741753730922936, "signal/frontier_coverage_0/centered_abs_mean": 0.17799755036830903, "signal/frontier_coverage_0/group_std_mean": 0.22672839164733888, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039601098746061325, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0025453649461269377, "signal/frontier_coverage_1/centered_abs_mean": 0.17799755036830903, "signal/frontier_coverage_1/group_std_mean": 0.22672839164733888, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039601098746061325, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0025453649461269377, "signal/frontier_coverage_10/centered_abs_mean": 0.0633821927011013, "signal/frontier_coverage_10/group_std_mean": 0.07961026728153228, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014111051522195339, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009063653764314949, "signal/frontier_coverage_15/centered_abs_mean": 0.07539696991443634, "signal/frontier_coverage_15/group_std_mean": 0.09288933426141739, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01682089865207672, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010781767079606653, "signal/frontier_coverage_20/centered_abs_mean": 0.1028669998049736, "signal/frontier_coverage_20/group_std_mean": 0.12718904614448548, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02296198531985283, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001470998045988381, "signal/frontier_coverage_25/centered_abs_mean": 0.13958117067813874, "signal/frontier_coverage_25/group_std_mean": 0.17355575263500214, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03115156516432762, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001996010704897344, "signal/frontier_coverage_5/centered_abs_mean": 0.17765924632549285, "signal/frontier_coverage_5/group_std_mean": 0.22631218731403352, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03952649161219597, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002540527284145355, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.318440192937851, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3866540253162384, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4967281222343445, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03184402026236057, "step": 680 }, { "calibration/aurc": 0.15669870698176264, "calibration/batch_distribution_entropy": 0.9750650248851596, "calibration/buffer_distribution_entropy": 0.9835866869663812, "calibration/confidence_entropy": 0.502090886950683, "calibration/coverage@0%": 0.10573188642297648, "calibration/coverage@1%": 0.11146105308964316, "calibration/coverage@10%": 0.398634682332463, "calibration/coverage@15%": 0.4460699521322889, "calibration/coverage@20%": 0.6534119342906874, "calibration/coverage@25%": 0.8571080831157529, "calibration/coverage@30%": 0.9274749782419496, "calibration/coverage@5%": 0.2270860530896432, "calibration/ece": 0.18874397584720412, "calibration/mean_confidence": 0.5418572219307551, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00269097222222221, "completions/max_length": 3918.6, "completions/max_terminated_length": 3918.6, "completions/mean_length": 1182.0064208984375, "completions/mean_terminated_length": 1185.24580078125, "completions/min_length": 55.4, "completions/min_terminated_length": 240.6, "epoch": 1.6455919301008737, "grad_norm": 0.0022894926369190216, "learning_rate": 2.1334134615384616e-06, "loss": -0.0028, "num_tokens": 1819744696.0, "reward": 1.025932240486145, "reward_std": 0.09519761502742767, "rewards/accuracy_reward": 0.7417534589767456, "rewards/brier_reward": 0.8289115786552429, "rewards/confidence_uniqueness_reward": 0.9471697688102723, "rewards/format_reward": 0.9973090291023254, "rewards/frontier_coverage_0": 0.0073368697427213195, "rewards/frontier_coverage_1": 0.0073368697427213195, "rewards/frontier_coverage_10": 0.04692419543862343, "rewards/frontier_coverage_15": 0.10556664913892747, "rewards/frontier_coverage_20": 0.1815480649471283, "rewards/frontier_coverage_25": 0.2728467047214508, "rewards/frontier_coverage_5": 0.007369892485439777, "rewards/frontier_entropy_batch_reward": -0.30200849175453187, "signal/accuracy_reward/centered_abs_mean": 0.10688476413488388, "signal/accuracy_reward/group_std_mean": 0.14036836326122284, "signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9464346647262574, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05344238206744194, "signal/advantage_abs_mean": 0.7758130669593811, "signal/advantage_pre_scale_abs_mean": 0.07317983582615853, "signal/advantage_pre_scale_std": 0.1202566534280777, "signal/advantage_std": 0.982705807685852, "signal/brier_reward/centered_abs_mean": 0.1028501957654953, "signal/brier_reward/group_std_mean": 0.13237460404634477, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18268101513385773, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010285019874572754, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01821411997079849, "signal/confidence_uniqueness_reward/group_std_mean": 0.025956546515226366, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03234106935560703, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001821412146091461, "signal/format_reward/centered_abs_mean": 0.004866536427289248, "signal/format_reward/group_std_mean": 0.009479801915585995, "signal/format_reward/group_zero_std_frac": 0.9611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04194744750857353, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002433268213644624, "signal/frontier_coverage_0/centered_abs_mean": 0.15280999839305878, "signal/frontier_coverage_0/group_std_mean": 0.1967985898256302, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038797355443239215, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002185182925313711, "signal/frontier_coverage_1/centered_abs_mean": 0.15280999839305878, "signal/frontier_coverage_1/group_std_mean": 0.1967985898256302, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038797355443239215, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002185182925313711, "signal/frontier_coverage_10/centered_abs_mean": 0.05912318155169487, "signal/frontier_coverage_10/group_std_mean": 0.07436081171035766, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01506075393408537, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008454615017399192, "signal/frontier_coverage_15/centered_abs_mean": 0.0739786371588707, "signal/frontier_coverage_15/group_std_mean": 0.09121221601963043, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018956642411649228, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00105789452791214, "signal/frontier_coverage_20/centered_abs_mean": 0.10158449411392212, "signal/frontier_coverage_20/group_std_mean": 0.12596147805452346, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0260475505143404, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014526582323014735, "signal/frontier_coverage_25/centered_abs_mean": 0.1375407963991165, "signal/frontier_coverage_25/group_std_mean": 0.17120290398597718, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.035241054370999336, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019668332999572156, "signal/frontier_coverage_5/centered_abs_mean": 0.15252489149570464, "signal/frontier_coverage_5/group_std_mean": 0.19644558429718018, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038724697381258014, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002181106014177203, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32505291104316714, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39162933826446533, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5808179020881653, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03250529170036316, "step": 685 }, { "calibration/aurc": 0.1229704696194999, "calibration/batch_distribution_entropy": 0.976383856114509, "calibration/buffer_distribution_entropy": 0.9839021225946876, "calibration/confidence_entropy": 0.48749734732868155, "calibration/coverage@0%": 0.07091239850040326, "calibration/coverage@1%": 0.1573707318337366, "calibration/coverage@10%": 0.5240379894422194, "calibration/coverage@15%": 0.6932473648266875, "calibration/coverage@20%": 0.7662947364314063, "calibration/coverage@25%": 0.8341147506481847, "calibration/coverage@30%": 0.9254281978638378, "calibration/coverage@5%": 0.3409230055500116, "calibration/ece": 0.1518131361828975, "calibration/mean_confidence": 0.5588064428822697, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0006944444444444642, "completions/max_length": 3496.6, "completions/max_terminated_length": 3496.6, "completions/mean_length": 1107.756689453125, "completions/mean_terminated_length": 1108.5136474609376, "completions/min_length": 84.4, "completions/min_terminated_length": 236.2, "epoch": 1.6575917801027487, "grad_norm": 0.002715888200327754, "learning_rate": 2.103365384615385e-06, "loss": 0.0081, "num_tokens": 1835604645.0, "reward": 1.019283664226532, "reward_std": 0.10603798031806946, "rewards/accuracy_reward": 0.7253472328186035, "rewards/brier_reward": 0.8144157767295838, "rewards/confidence_uniqueness_reward": 0.9509435057640075, "rewards/format_reward": 0.9993055582046508, "rewards/frontier_coverage_0": 0.0016073930077254773, "rewards/frontier_coverage_1": 0.0016073930077254773, "rewards/frontier_coverage_10": 0.042944446206092834, "rewards/frontier_coverage_15": 0.09877827614545823, "rewards/frontier_coverage_20": 0.16919994354248047, "rewards/frontier_coverage_25": 0.2534219026565552, "rewards/frontier_coverage_5": 0.0017386081628501416, "rewards/frontier_entropy_batch_reward": -0.27719637751579285, "signal/accuracy_reward/centered_abs_mean": 0.1352647602558136, "signal/accuracy_reward/group_std_mean": 0.17324215471744536, "signal/accuracy_reward/group_zero_std_frac": 0.5277777791023255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0748308062553407, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0676323801279068, "signal/advantage_abs_mean": 0.7841925501823426, "signal/advantage_pre_scale_abs_mean": 0.08377051204442978, "signal/advantage_pre_scale_std": 0.12926071733236313, "signal/advantage_std": 0.9828932642936706, "signal/brier_reward/centered_abs_mean": 0.11323733180761338, "signal/brier_reward/group_std_mean": 0.1463605895638466, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18019480109214783, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011323734000325204, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014994461461901665, "signal/confidence_uniqueness_reward/group_std_mean": 0.02000431716442108, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02399727888405323, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001499446202069521, "signal/format_reward/centered_abs_mean": 0.001312933990266174, "signal/format_reward/group_std_mean": 0.0030315483920276163, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.010730944946408272, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.000656466995133087, "signal/frontier_coverage_0/centered_abs_mean": 0.16635308563709258, "signal/frontier_coverage_0/group_std_mean": 0.21567732095718384, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037881956249475476, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023788490798324347, "signal/frontier_coverage_1/centered_abs_mean": 0.16635308563709258, "signal/frontier_coverage_1/group_std_mean": 0.21567732095718384, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037881956249475476, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023788490798324347, "signal/frontier_coverage_10/centered_abs_mean": 0.062270589917898175, "signal/frontier_coverage_10/group_std_mean": 0.07897855192422867, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014190655015408993, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008904694463126361, "signal/frontier_coverage_15/centered_abs_mean": 0.07686868906021119, "signal/frontier_coverage_15/group_std_mean": 0.09543234705924988, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017530930414795875, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010992222232744098, "signal/frontier_coverage_20/centered_abs_mean": 0.10772657692432404, "signal/frontier_coverage_20/group_std_mean": 0.133695587515831, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024576536566019058, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015404900535941125, "signal/frontier_coverage_25/centered_abs_mean": 0.14818452894687653, "signal/frontier_coverage_25/group_std_mean": 0.1841533213853836, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03380677923560142, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002119038766250014, "signal/frontier_coverage_5/centered_abs_mean": 0.16605048775672912, "signal/frontier_coverage_5/group_std_mean": 0.21530235409736634, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03781315460801125, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002374521875753999, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3271877884864807, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3948762595653534, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5215847194194794, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03271878063678742, "step": 690 }, { "calibration/aurc": 0.12016505696814503, "calibration/batch_distribution_entropy": 0.9630713458254473, "calibration/buffer_distribution_entropy": 0.9853825829045704, "calibration/confidence_entropy": 0.4604225348032044, "calibration/coverage@0%": 0.10989583333333333, "calibration/coverage@1%": 0.2588541666666667, "calibration/coverage@10%": 0.5395833333333333, "calibration/coverage@15%": 0.6171874999999999, "calibration/coverage@20%": 0.7697916666666667, "calibration/coverage@25%": 0.8526041666666668, "calibration/coverage@30%": 0.9010416666666667, "calibration/coverage@5%": 0.4583333333333333, "calibration/ece": 0.1897616322916667, "calibration/mean_confidence": 0.5404426177083334, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0005208333333333482, "completions/max_length": 3440.2, "completions/max_terminated_length": 3440.2, "completions/mean_length": 1030.8426391601563, "completions/mean_terminated_length": 1031.3654174804688, "completions/min_length": 136.4, "completions/min_terminated_length": 261.4, "epoch": 1.6695916301046236, "grad_norm": 0.0026529114693403244, "learning_rate": 2.073317307692308e-06, "loss": 0.0088, "num_tokens": 1850554288.0, "reward": 1.0253738403320312, "reward_std": 0.10471922159194946, "rewards/accuracy_reward": 0.7399305582046509, "rewards/brier_reward": 0.8257181644439697, "rewards/confidence_uniqueness_reward": 0.9484269022941589, "rewards/format_reward": 0.9994791626930237, "rewards/frontier_coverage_0": 0.005620070081204176, "rewards/frontier_coverage_1": 0.005620070081204176, "rewards/frontier_coverage_10": 0.04614498615264893, "rewards/frontier_coverage_15": 0.10905924439430237, "rewards/frontier_coverage_20": 0.18669271767139434, "rewards/frontier_coverage_25": 0.2785437643527985, "rewards/frontier_coverage_5": 0.0057056773453950885, "rewards/frontier_entropy_batch_reward": -0.3086018800735474, "signal/accuracy_reward/centered_abs_mean": 0.13557942658662797, "signal/accuracy_reward/group_std_mean": 0.18026112020015717, "signal/accuracy_reward/group_zero_std_frac": 0.48055556416511536, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0554683685302735, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06778971329331399, "signal/advantage_abs_mean": 0.7678684234619141, "signal/advantage_pre_scale_abs_mean": 0.08026924282312393, "signal/advantage_pre_scale_std": 0.1252484291791916, "signal/advantage_std": 0.982914924621582, "signal/brier_reward/centered_abs_mean": 0.10984794348478318, "signal/brier_reward/group_std_mean": 0.14237151443958282, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17236720025539398, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010984793864190578, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01557443682104349, "signal/confidence_uniqueness_reward/group_std_mean": 0.020670870319008827, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024543348327279092, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015574437333270908, "signal/format_reward/centered_abs_mean": 0.0009982638759538532, "signal/format_reward/group_std_mean": 0.0026473373174667357, "signal/format_reward/group_zero_std_frac": 0.9861111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.007815391756594181, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004991319379769266, "signal/frontier_coverage_0/centered_abs_mean": 0.1692986935377121, "signal/frontier_coverage_0/group_std_mean": 0.21854256391525267, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03790202885866165, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024209713097661733, "signal/frontier_coverage_1/centered_abs_mean": 0.1692986935377121, "signal/frontier_coverage_1/group_std_mean": 0.21854256391525267, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03790202885866165, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024209713097661733, "signal/frontier_coverage_10/centered_abs_mean": 0.06352206021547317, "signal/frontier_coverage_10/group_std_mean": 0.08006793260574341, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01428398210555315, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000908365473151207, "signal/frontier_coverage_15/centered_abs_mean": 0.07578349262475967, "signal/frontier_coverage_15/group_std_mean": 0.09459168761968613, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017029393836855887, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010837039677426218, "signal/frontier_coverage_20/centered_abs_mean": 0.10363190919160843, "signal/frontier_coverage_20/group_std_mean": 0.13097960501909256, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023263034224510194, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014819363364949822, "signal/frontier_coverage_25/centered_abs_mean": 0.14058729410171508, "signal/frontier_coverage_25/group_std_mean": 0.17922786474227906, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03153381682932377, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00201039828825742, "signal/frontier_coverage_5/centered_abs_mean": 0.16897266507148742, "signal/frontier_coverage_5/group_std_mean": 0.21812840402126313, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03782900050282478, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002416309108957648, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3316554367542267, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3980835318565369, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5211562156677246, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03316554352641106, "step": 695 }, { "calibration/aurc": 0.13793057252636853, "calibration/batch_distribution_entropy": 0.9191704771321465, "calibration/buffer_distribution_entropy": 0.9850459534761482, "calibration/confidence_entropy": 0.48348108765589587, "calibration/coverage@0%": 0.03594022687609075, "calibration/coverage@1%": 0.11094022687609073, "calibration/coverage@10%": 0.36704842931937176, "calibration/coverage@15%": 0.5469213568935428, "calibration/coverage@20%": 0.8864119764397905, "calibration/coverage@25%": 0.9385416666666668, "calibration/coverage@30%": 0.96875, "calibration/coverage@5%": 0.21469240837696332, "calibration/ece": 0.15457895792702878, "calibration/mean_confidence": 0.6365902838432592, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000868055555555558, "completions/max_length": 3035.0, "completions/max_terminated_length": 3035.0, "completions/mean_length": 993.9149169921875, "completions/mean_terminated_length": 994.7564208984375, "completions/min_length": 51.6, "completions/min_terminated_length": 224.6, "epoch": 1.6815914801064986, "grad_norm": 0.0027931861113756895, "learning_rate": 2.043269230769231e-06, "loss": 0.0022, "num_tokens": 1865124220.0, "reward": 1.0046656370162963, "reward_std": 0.09352846145629883, "rewards/accuracy_reward": 0.706249988079071, "rewards/brier_reward": 0.8287553548812866, "rewards/confidence_uniqueness_reward": 0.9467250108718872, "rewards/format_reward": 0.9991319417953491, "rewards/frontier_coverage_0": 0.026872091740369797, "rewards/frontier_coverage_1": 0.026872091740369797, "rewards/frontier_coverage_10": 0.04779320433735847, "rewards/frontier_coverage_15": 0.0996496319770813, "rewards/frontier_coverage_20": 0.16897362768650054, "rewards/frontier_coverage_25": 0.253493994474411, "rewards/frontier_coverage_5": 0.026916111633181573, "rewards/frontier_entropy_batch_reward": -0.34876567125320435, "signal/accuracy_reward/centered_abs_mean": 0.0991970494389534, "signal/accuracy_reward/group_std_mean": 0.1393287718296051, "signal/accuracy_reward/group_zero_std_frac": 0.5694444417953491, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.842095923423767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0495985247194767, "signal/advantage_abs_mean": 0.7627769827842712, "signal/advantage_pre_scale_abs_mean": 0.07055802345275879, "signal/advantage_pre_scale_std": 0.11469898372888565, "signal/advantage_std": 0.9827576041221618, "signal/brier_reward/centered_abs_mean": 0.1017798662185669, "signal/brier_reward/group_std_mean": 0.13196168690919877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17585197389125823, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010177987068891526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015178951062262058, "signal/confidence_uniqueness_reward/group_std_mean": 0.020427386462688445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026487966254353523, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015178951434791088, "signal/format_reward/centered_abs_mean": 0.0015733506763353944, "signal/format_reward/group_std_mean": 0.0034799596294760706, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014351568464189769, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0007866753381676972, "signal/frontier_coverage_0/centered_abs_mean": 0.1307838648557663, "signal/frontier_coverage_0/group_std_mean": 0.173773917555809, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032213568314909936, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001870209281332791, "signal/frontier_coverage_1/centered_abs_mean": 0.1307838648557663, "signal/frontier_coverage_1/group_std_mean": 0.173773917555809, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032213568314909936, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001870209281332791, "signal/frontier_coverage_10/centered_abs_mean": 0.055519319325685504, "signal/frontier_coverage_10/group_std_mean": 0.07080635875463485, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013711910881102084, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007939262664876878, "signal/frontier_coverage_15/centered_abs_mean": 0.07911764830350876, "signal/frontier_coverage_15/group_std_mean": 0.09775821417570114, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019597242772579192, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001131382375024259, "signal/frontier_coverage_20/centered_abs_mean": 0.1124587595462799, "signal/frontier_coverage_20/group_std_mean": 0.13922121226787568, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027819440886378288, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016081602778285741, "signal/frontier_coverage_25/centered_abs_mean": 0.1522460699081421, "signal/frontier_coverage_25/group_std_mean": 0.1895580768585205, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037604338675737384, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021771186031401156, "signal/frontier_coverage_5/centered_abs_mean": 0.13061472475528718, "signal/frontier_coverage_5/group_std_mean": 0.17355478703975677, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0321720227599144, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018677905201911927, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.330916690826416, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39551963210105895, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5743350386619568, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033091668784618375, "step": 700 }, { "epoch": 1.6815914801064986, "eval_calibration/aurc": 0.18089802504255273, "eval_calibration/batch_distribution_entropy": 0.9028164891686236, "eval_calibration/buffer_distribution_entropy": 0.9841904792966663, "eval_calibration/confidence_entropy": 0.47011778535507714, "eval_calibration/coverage@0%": 0.15104166666666666, "eval_calibration/coverage@1%": 0.15104166666666666, "eval_calibration/coverage@10%": 0.390625, "eval_calibration/coverage@15%": 0.640625, "eval_calibration/coverage@20%": 0.8020833333333334, "eval_calibration/coverage@25%": 0.8541666666666666, "eval_calibration/coverage@30%": 0.90625, "eval_calibration/coverage@5%": 0.15104166666666666, "eval_calibration/ece": 0.20879, "eval_calibration/mean_confidence": 0.6107004166666666, "eval_completions/clipped_ratio": 0.0, "eval_completions/max_length": 2728.0, "eval_completions/max_terminated_length": 2728.0, "eval_completions/mean_length": 1002.8826700846354, "eval_completions/mean_terminated_length": 1002.8826700846354, "eval_completions/min_length": 291.3333333333333, "eval_completions/min_terminated_length": 291.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 1865124220.0, "eval_reward": 0.9274651606877645, "eval_reward_std": 0.23160785188277563, "eval_rewards/accuracy_reward": 0.6901041666666666, "eval_rewards/brier_reward": 0.8312720060348511, "eval_rewards/confidence_uniqueness_reward": 0.8948567608992258, "eval_rewards/format_reward": 1.0, "eval_rewards/frontier_coverage_0": 0.0433860938064754, "eval_rewards/frontier_coverage_1": 0.0433860938064754, "eval_rewards/frontier_coverage_10": 0.04865478724241257, "eval_rewards/frontier_coverage_15": 0.09748644630114238, "eval_rewards/frontier_coverage_20": 0.16351032753785452, "eval_rewards/frontier_coverage_25": 0.24551946173111597, "eval_rewards/frontier_coverage_5": 0.04338224340851108, "eval_rewards/frontier_entropy_batch_reward": -1.0, "eval_runtime": 158.4425, "eval_samples_per_second": 6.311, "eval_signal/accuracy_reward/centered_abs_mean": 0.4116210887829463, "eval_signal/accuracy_reward/group_std_mean": 0.4593142320712407, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.892851193745931, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20581054439147314, "eval_signal/advantage_abs_mean": 0.8790315886338552, "eval_signal/advantage_pre_scale_abs_mean": 0.20490500579277673, "eval_signal/advantage_pre_scale_std": 0.22919744749863943, "eval_signal/advantage_std": 0.9863952895005544, "eval_signal/brier_reward/centered_abs_mean": 0.1591823771595955, "eval_signal/brier_reward/group_std_mean": 0.21574609478314719, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06901257298886776, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01591823762282729, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0410291887819767, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.04890784186621507, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01784918162350853, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00410291882387052, "eval_signal/format_reward/centered_abs_mean": 0.0, "eval_signal/format_reward/group_std_mean": 0.0, "eval_signal/format_reward/group_zero_std_frac": 1.0, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.25617842624584836, "eval_signal/frontier_coverage_0/group_std_mean": 0.35960617661476135, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01589485149209698, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036633514488736787, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.25617842624584836, "eval_signal/frontier_coverage_1/group_std_mean": 0.35960617661476135, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01589485149209698, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036633514488736787, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.07833195229371388, "eval_signal/frontier_coverage_10/group_std_mean": 0.10744242370128632, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004864773480221629, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011201469460502267, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.11549535890420277, "eval_signal/frontier_coverage_15/group_std_mean": 0.14778297146161398, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007180764805525541, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016515836274872224, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.19812731196482977, "eval_signal/frontier_coverage_20/group_std_mean": 0.2471755420168241, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012313599543025097, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002833220448034505, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.29754012326399487, "eval_signal/frontier_coverage_25/group_std_mean": 0.3664591312408447, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01848344939450423, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004254823783412576, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.25584504504998523, "eval_signal/frontier_coverage_5/group_std_mean": 0.35919003188610077, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01587420531238119, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0036585842026397586, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "eval_steps_per_second": 0.038, "step": 700 }, { "epoch": 1.6815914801064986, "step": 700, "train_probe_calibration/aurc": 0.07127535899437333, "train_probe_calibration/batch_distribution_entropy": 0.9124863581279891, "train_probe_calibration/buffer_distribution_entropy": 0.9839733115888903, "train_probe_calibration/confidence_entropy": 0.5049382609896232, "train_probe_calibration/coverage@0%": 0.4270833333333333, "train_probe_calibration/coverage@1%": 0.4270833333333333, "train_probe_calibration/coverage@10%": 0.7552083333333334, "train_probe_calibration/coverage@15%": 0.8489583333333334, "train_probe_calibration/coverage@20%": 0.9427083333333334, "train_probe_calibration/coverage@25%": 0.9791666666666666, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.515625, "train_probe_calibration/ece": 0.22284895833333332, "train_probe_calibration/mean_confidence": 0.6038437499999999, "train_probe_completions/clipped_ratio": 0.0, "train_probe_completions/max_length": 2894.3333333333335, "train_probe_completions/max_terminated_length": 2894.3333333333335, "train_probe_completions/mean_length": 1015.2335306803385, "train_probe_completions/mean_terminated_length": 1015.2335306803385, "train_probe_completions/min_length": 299.6666666666667, "train_probe_completions/min_terminated_length": 299.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 1865124220.0, "train_probe_reward": 0.96530615290006, "train_probe_reward_std": 0.2089141458272934, "train_probe_rewards/accuracy_reward": 0.7664930621782938, "train_probe_rewards/brier_reward": 0.8375353117783865, "train_probe_rewards/confidence_uniqueness_reward": 0.895941843589147, "train_probe_rewards/format_reward": 1.0, "train_probe_rewards/frontier_coverage_0": -0.006343296496197581, "train_probe_rewards/frontier_coverage_1": -0.006343296496197581, "train_probe_rewards/frontier_coverage_10": 0.04194001046319803, "train_probe_rewards/frontier_coverage_15": 0.10766408095757167, "train_probe_rewards/frontier_coverage_20": 0.18949769685665765, "train_probe_rewards/frontier_coverage_25": 0.28906770547231037, "train_probe_rewards/frontier_coverage_5": -0.0062605949739615125, "train_probe_rewards/frontier_entropy_batch_reward": -1.0, "train_probe_runtime": 150.7247, "train_probe_samples_per_second": 6.635, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3511827240387599, "train_probe_signal/accuracy_reward/group_std_mean": 0.4241461455821991, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8484959204991659, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17559136201937994, "train_probe_signal/advantage_abs_mean": 0.8118196030457815, "train_probe_signal/advantage_pre_scale_abs_mean": 0.17097164442141852, "train_probe_signal/advantage_pre_scale_std": 0.20714367926120758, "train_probe_signal/advantage_std": 0.9863471786181132, "train_probe_signal/brier_reward/centered_abs_mean": 0.14898951599995294, "train_probe_signal/brier_reward/group_std_mean": 0.20260730385780334, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07194508115450542, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014898952251921097, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042310927684108414, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.04979841659466425, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020431222083667915, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042310926752785845, "train_probe_signal/format_reward/centered_abs_mean": 0.0, "train_probe_signal/format_reward/group_std_mean": 0.0, "train_probe_signal/format_reward/group_zero_std_frac": 1.0, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23911839226881662, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.3518788516521454, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016536445822566748, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003419393013852338, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23911839226881662, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.3518788516521454, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016536445822566748, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003419393013852338, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07297215610742569, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.1017376904686292, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0050443368187795086, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010435017951143284, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.10844651361306508, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.1344559801121553, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007490781756738822, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00155078514944762, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18234311292568842, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2210288643836975, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012593142222613096, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002607506583444774, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2681734710931778, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.3245113790035248, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01852063648402691, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00383488069443653, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23879685004552206, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.35146549840768176, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016514215618371964, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034147949190810323, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 1.0, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0, "train_probe_steps_per_second": 0.04 }, { "calibration/aurc": 0.11227090677110904, "calibration/batch_distribution_entropy": 0.948779230760648, "calibration/buffer_distribution_entropy": 0.9835474650215609, "calibration/confidence_entropy": 0.49129050024545345, "calibration/coverage@0%": 0.040625, "calibration/coverage@1%": 0.040625, "calibration/coverage@10%": 0.5630208333333333, "calibration/coverage@15%": 0.7036458333333334, "calibration/coverage@20%": 0.8458333333333332, "calibration/coverage@25%": 0.909375, "calibration/coverage@30%": 0.9583333333333333, "calibration/coverage@5%": 0.3458333333333333, "calibration/ece": 0.18081584374999998, "calibration/mean_confidence": 0.6116592604166666, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0004340277777777901, "completions/max_length": 3177.6, "completions/max_terminated_length": 3177.6, "completions/mean_length": 986.2343017578125, "completions/mean_terminated_length": 986.6416015625, "completions/min_length": 158.2, "completions/min_terminated_length": 269.6, "epoch": 1.6935913301083736, "grad_norm": 0.0028598185162991285, "learning_rate": 2.013221153846154e-06, "loss": 0.0069, "num_tokens": 1879571655.0, "reward": 1.019974374771118, "reward_std": 0.09644923657178879, "rewards/accuracy_reward": 0.7236979126930236, "rewards/brier_reward": 0.8357795834541321, "rewards/confidence_uniqueness_reward": 0.9502484321594238, "rewards/format_reward": 0.9995659708976745, "rewards/frontier_coverage_0": 0.02306669168174267, "rewards/frontier_coverage_1": 0.02306669168174267, "rewards/frontier_coverage_10": 0.04798247441649437, "rewards/frontier_coverage_15": 0.10166206508874893, "rewards/frontier_coverage_20": 0.17447640299797057, "rewards/frontier_coverage_25": 0.2648200333118439, "rewards/frontier_coverage_5": 0.02308344580233097, "rewards/frontier_entropy_batch_reward": -0.2967200607061386, "signal/accuracy_reward/centered_abs_mean": 0.11241862177848816, "signal/accuracy_reward/group_std_mean": 0.15345828533172606, "signal/accuracy_reward/group_zero_std_frac": 0.5472222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9400637745857239, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05620931088924408, "signal/advantage_abs_mean": 0.763127076625824, "signal/advantage_pre_scale_abs_mean": 0.0733158528804779, "signal/advantage_pre_scale_std": 0.11780442744493484, "signal/advantage_std": 0.9828173637390136, "signal/brier_reward/centered_abs_mean": 0.09998511821031571, "signal/brier_reward/group_std_mean": 0.130735120177269, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16749710142612456, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009998511895537377, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.014156256802380085, "signal/confidence_uniqueness_reward/group_std_mean": 0.018865460343658925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02363467663526535, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014156257035210728, "signal/format_reward/centered_abs_mean": 0.0008300781133584678, "signal/format_reward/group_std_mean": 0.0021562909707427023, "signal/format_reward/group_zero_std_frac": 0.9888888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.006732623372226953, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0004150390566792339, "signal/frontier_coverage_0/centered_abs_mean": 0.1440996915102005, "signal/frontier_coverage_0/group_std_mean": 0.19001898765563965, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0345321387052536, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020606255857273936, "signal/frontier_coverage_1/centered_abs_mean": 0.1440996915102005, "signal/frontier_coverage_1/group_std_mean": 0.19001898765563965, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0345321387052536, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020606255857273936, "signal/frontier_coverage_10/centered_abs_mean": 0.056343245506286624, "signal/frontier_coverage_10/group_std_mean": 0.07241087406873703, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013484322652220725, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008057083818130195, "signal/frontier_coverage_15/centered_abs_mean": 0.07188424617052078, "signal/frontier_coverage_15/group_std_mean": 0.08956207633018494, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017177759483456612, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001027944718953222, "signal/frontier_coverage_20/centered_abs_mean": 0.10164368897676468, "signal/frontier_coverage_20/group_std_mean": 0.127097025513649, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024291865527629852, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001453504804521799, "signal/frontier_coverage_25/centered_abs_mean": 0.13992716670036315, "signal/frontier_coverage_25/group_std_mean": 0.17583813071250914, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033450279384851456, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020009585423395038, "signal/frontier_coverage_5/centered_abs_mean": 0.1439125806093216, "signal/frontier_coverage_5/group_std_mean": 0.18978277444839478, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03448736071586609, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020579498959705234, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32811395525932313, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3939893305301666, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5487881243228913, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032811397686600685, "step": 705 }, { "calibration/aurc": 0.11177033556535712, "calibration/batch_distribution_entropy": 0.976128231345325, "calibration/buffer_distribution_entropy": 0.9831304894900681, "calibration/confidence_entropy": 0.4772357399211125, "calibration/coverage@0%": 0.10104166666666667, "calibration/coverage@1%": 0.2026041666666667, "calibration/coverage@10%": 0.6203125, "calibration/coverage@15%": 0.7109375, "calibration/coverage@20%": 0.7885416666666667, "calibration/coverage@25%": 0.8604166666666668, "calibration/coverage@30%": 0.909375, "calibration/coverage@5%": 0.4421875, "calibration/ece": 0.1605678541666667, "calibration/mean_confidence": 0.518036125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 3288.6, "completions/max_terminated_length": 3288.6, "completions/mean_length": 952.0667602539063, "completions/mean_terminated_length": 953.1977416992188, "completions/min_length": 123.0, "completions/min_terminated_length": 214.0, "epoch": 1.7055911801102486, "grad_norm": 0.0027350601740181446, "learning_rate": 1.983173076923077e-06, "loss": 0.0059, "num_tokens": 1893672808.0, "reward": 1.0128086686134339, "reward_std": 0.09612514525651931, "rewards/accuracy_reward": 0.7134548664093018, "rewards/brier_reward": 0.8288593173027039, "rewards/confidence_uniqueness_reward": 0.9479322075843811, "rewards/format_reward": 0.9986979246139527, "rewards/frontier_coverage_0": 0.032473142445087436, "rewards/frontier_coverage_1": 0.032473142445087436, "rewards/frontier_coverage_10": 0.05002719163894653, "rewards/frontier_coverage_15": 0.09773223251104354, "rewards/frontier_coverage_20": 0.1664465069770813, "rewards/frontier_coverage_25": 0.25369060337543486, "rewards/frontier_coverage_5": 0.032423215731978414, "rewards/frontier_entropy_batch_reward": -0.3046021282672882, "signal/accuracy_reward/centered_abs_mean": 0.11392686665058135, "signal/accuracy_reward/group_std_mean": 0.15370176434516908, "signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9989817976951599, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05696343332529068, "signal/advantage_abs_mean": 0.7665924191474914, "signal/advantage_pre_scale_abs_mean": 0.07367298156023025, "signal/advantage_pre_scale_std": 0.11973680555820465, "signal/advantage_std": 0.9827433586120605, "signal/brier_reward/centered_abs_mean": 0.10304120779037476, "signal/brier_reward/group_std_mean": 0.13187731206417083, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1804224044084549, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010304121114313603, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015719205886125565, "signal/confidence_uniqueness_reward/group_std_mean": 0.020367484539747238, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0275027796626091, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015719205373898148, "signal/format_reward/centered_abs_mean": 0.00201280377805233, "signal/format_reward/group_std_mean": 0.0031654864549636843, "signal/format_reward/group_zero_std_frac": 0.9888888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.017516496032476424, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001006401889026165, "signal/frontier_coverage_0/centered_abs_mean": 0.15068837106227875, "signal/frontier_coverage_0/group_std_mean": 0.1956734299659729, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0377209234982729, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021548437187448146, "signal/frontier_coverage_1/centered_abs_mean": 0.15068837106227875, "signal/frontier_coverage_1/group_std_mean": 0.1956734299659729, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0377209234982729, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021548437187448146, "signal/frontier_coverage_10/centered_abs_mean": 0.0584891103208065, "signal/frontier_coverage_10/group_std_mean": 0.07460076361894608, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014630392752587795, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008363942615687847, "signal/frontier_coverage_15/centered_abs_mean": 0.0715473860502243, "signal/frontier_coverage_15/group_std_mean": 0.08882242441177368, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017906750738620757, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001023127674125135, "signal/frontier_coverage_20/centered_abs_mean": 0.09926576763391495, "signal/frontier_coverage_20/group_std_mean": 0.12329905033111573, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024845069274306297, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014195004710927605, "signal/frontier_coverage_25/centered_abs_mean": 0.13574471473693847, "signal/frontier_coverage_25/group_std_mean": 0.16967822313308717, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03398062214255333, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019411493558436632, "signal/frontier_coverage_5/centered_abs_mean": 0.15050061643123627, "signal/frontier_coverage_5/group_std_mean": 0.19543037116527556, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03767400272190571, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021521587623283267, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31716119647026064, "signal/frontier_entropy_batch_reward/group_std_mean": 0.386073637008667, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5541856288909912, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03171611912548542, "step": 710 }, { "calibration/aurc": 0.16409598680310306, "calibration/batch_distribution_entropy": 0.9560436108340845, "calibration/buffer_distribution_entropy": 0.9837300379537656, "calibration/confidence_entropy": 0.4912980836544441, "calibration/coverage@0%": 0.1578125, "calibration/coverage@1%": 0.2390625, "calibration/coverage@10%": 0.4213541666666667, "calibration/coverage@15%": 0.4598958333333334, "calibration/coverage@20%": 0.6744791666666667, "calibration/coverage@25%": 0.7630208333333333, "calibration/coverage@30%": 0.83125, "calibration/coverage@5%": 0.3338541666666667, "calibration/ece": 0.19649028546559952, "calibration/mean_confidence": 0.5740259916177338, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00017361111111111605, "completions/max_length": 3283.2, "completions/max_terminated_length": 3283.2, "completions/mean_length": 951.1273559570312, "completions/mean_terminated_length": 951.2944213867188, "completions/min_length": 156.4, "completions/min_terminated_length": 273.4, "epoch": 1.7175910301121236, "grad_norm": 0.002813108032569289, "learning_rate": 1.953125e-06, "loss": 0.0067, "num_tokens": 1907777379.0, "reward": 1.0050580382347107, "reward_std": 0.09790399223566056, "rewards/accuracy_reward": 0.7006076455116272, "rewards/brier_reward": 0.8105961203575134, "rewards/confidence_uniqueness_reward": 0.9501657247543335, "rewards/format_reward": 0.9998263835906982, "rewards/frontier_coverage_0": 0.017787472996860742, "rewards/frontier_coverage_1": 0.017787472996860742, "rewards/frontier_coverage_10": 0.039942527562379836, "rewards/frontier_coverage_15": 0.08797252029180527, "rewards/frontier_coverage_20": 0.15163930654525756, "rewards/frontier_coverage_25": 0.22993890941143036, "rewards/frontier_coverage_5": 0.01784335859119892, "rewards/frontier_entropy_batch_reward": -0.2928480267524719, "signal/accuracy_reward/centered_abs_mean": 0.1241156667470932, "signal/accuracy_reward/group_std_mean": 0.16047287881374359, "signal/accuracy_reward/group_zero_std_frac": 0.5527777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0350728273391723, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0620578333735466, "signal/advantage_abs_mean": 0.7821515083312989, "signal/advantage_pre_scale_abs_mean": 0.07757937014102936, "signal/advantage_pre_scale_std": 0.1194337010383606, "signal/advantage_std": 0.9828120470046997, "signal/brier_reward/centered_abs_mean": 0.10871105641126633, "signal/brier_reward/group_std_mean": 0.13986618518829347, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18278415501117706, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010871105827391148, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01440898347645998, "signal/confidence_uniqueness_reward/group_std_mean": 0.018457892164587975, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.024234963953495024, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014408984687179327, "signal/format_reward/centered_abs_mean": 0.0003363715251907706, "signal/format_reward/group_std_mean": 0.0009820926934480667, "signal/format_reward/group_zero_std_frac": 0.9944444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.002808227576315403, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0001681857625953853, "signal/frontier_coverage_0/centered_abs_mean": 0.168141171336174, "signal/frontier_coverage_0/group_std_mean": 0.21634862720966339, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04029642269015312, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002404418867081404, "signal/frontier_coverage_1/centered_abs_mean": 0.168141171336174, "signal/frontier_coverage_1/group_std_mean": 0.21634862720966339, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04029642269015312, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002404418867081404, "signal/frontier_coverage_10/centered_abs_mean": 0.060566478222608564, "signal/frontier_coverage_10/group_std_mean": 0.07745107561349869, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014549786597490311, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008661005995236337, "signal/frontier_coverage_15/centered_abs_mean": 0.06986679509282112, "signal/frontier_coverage_15/group_std_mean": 0.0866427794098854, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016798367351293565, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009990951512008905, "signal/frontier_coverage_20/centered_abs_mean": 0.09684419780969619, "signal/frontier_coverage_20/group_std_mean": 0.12026553452014924, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023264965415000914, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013848720118403435, "signal/frontier_coverage_25/centered_abs_mean": 0.13374279588460922, "signal/frontier_coverage_25/group_std_mean": 0.1665105402469635, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03211365006864071, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019125219201669096, "signal/frontier_coverage_5/centered_abs_mean": 0.1679401069879532, "signal/frontier_coverage_5/group_std_mean": 0.21610071659088134, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.04024786874651909, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002401543501764536, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32422704696655275, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3925470232963562, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5443297028541565, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03242270387709141, "step": 715 }, { "calibration/aurc": 0.11311558304503566, "calibration/batch_distribution_entropy": 0.9469167067730021, "calibration/buffer_distribution_entropy": 0.9843809426976993, "calibration/confidence_entropy": 0.4714342502997916, "calibration/coverage@0%": 0.08125, "calibration/coverage@1%": 0.15364583333333331, "calibration/coverage@10%": 0.59375, "calibration/coverage@15%": 0.6770833333333334, "calibration/coverage@20%": 0.7578125, "calibration/coverage@25%": 0.8171875, "calibration/coverage@30%": 0.9010416666666667, "calibration/coverage@5%": 0.5281250000000001, "calibration/ece": 0.15241121949599107, "calibration/mean_confidence": 0.6112474159206757, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0013020833333333482, "completions/max_length": 3388.2, "completions/max_terminated_length": 3388.2, "completions/mean_length": 1030.08037109375, "completions/mean_terminated_length": 1031.532958984375, "completions/min_length": 150.0, "completions/min_terminated_length": 324.8, "epoch": 1.7295908801139985, "grad_norm": 0.0027848011814057827, "learning_rate": 1.9230769230769234e-06, "loss": 0.0091, "num_tokens": 1922750273.0, "reward": 1.0154060482978822, "reward_std": 0.10402875542640685, "rewards/accuracy_reward": 0.715625, "rewards/brier_reward": 0.8296850442886352, "rewards/confidence_uniqueness_reward": 0.9488834977149964, "rewards/format_reward": 0.9986979126930237, "rewards/frontier_coverage_0": 0.026306459889747204, "rewards/frontier_coverage_1": 0.026306459889747204, "rewards/frontier_coverage_10": 0.04776106104254722, "rewards/frontier_coverage_15": 0.10400652289390563, "rewards/frontier_coverage_20": 0.1774923324584961, "rewards/frontier_coverage_25": 0.2654254615306854, "rewards/frontier_coverage_5": 0.02636495413025841, "rewards/frontier_entropy_batch_reward": -0.29245676696300504, "signal/accuracy_reward/centered_abs_mean": 0.12931857705116273, "signal/accuracy_reward/group_std_mean": 0.17324694395065307, "signal/accuracy_reward/group_zero_std_frac": 0.4944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.016154146194458, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06465928852558137, "signal/advantage_abs_mean": 0.768020224571228, "signal/advantage_pre_scale_abs_mean": 0.08030048310756684, "signal/advantage_pre_scale_std": 0.1255600705742836, "signal/advantage_std": 0.9828881740570068, "signal/brier_reward/centered_abs_mean": 0.1074549213051796, "signal/brier_reward/group_std_mean": 0.13885989040136337, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1708405613899231, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01074549201875925, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015975476428866386, "signal/confidence_uniqueness_reward/group_std_mean": 0.0203463114798069, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02538231648504734, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015975477173924447, "signal/format_reward/centered_abs_mean": 0.0019151475164107979, "signal/format_reward/group_std_mean": 0.0030056854709982874, "signal/format_reward/group_zero_std_frac": 0.9888888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.01378869116306305, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0009575737582053989, "signal/frontier_coverage_0/centered_abs_mean": 0.15936702787876128, "signal/frontier_coverage_0/group_std_mean": 0.20675169229507445, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03624581061303615, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002278948575258255, "signal/frontier_coverage_1/centered_abs_mean": 0.15936702787876128, "signal/frontier_coverage_1/group_std_mean": 0.20675169229507445, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03624581061303615, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002278948575258255, "signal/frontier_coverage_10/centered_abs_mean": 0.06151966378092766, "signal/frontier_coverage_10/group_std_mean": 0.07832264006137848, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014088746346533298, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008797311922535301, "signal/frontier_coverage_15/centered_abs_mean": 0.07503360658884048, "signal/frontier_coverage_15/group_std_mean": 0.09373433589935302, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0172480970621109, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010729805566370488, "signal/frontier_coverage_20/centered_abs_mean": 0.10515500009059905, "signal/frontier_coverage_20/group_std_mean": 0.13239262700080873, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02413479909300804, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001503716385923326, "signal/frontier_coverage_25/centered_abs_mean": 0.14459011554718018, "signal/frontier_coverage_25/group_std_mean": 0.1828522264957428, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033116637542843816, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020676386076956986, "signal/frontier_coverage_5/centered_abs_mean": 0.15917536318302156, "signal/frontier_coverage_5/group_std_mean": 0.20650528967380524, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03620238043367863, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002276207786053419, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33016577959060667, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3960925698280334, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5278231203556061, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0330165795981884, "step": 720 }, { "calibration/aurc": 0.08872587034216756, "calibration/batch_distribution_entropy": 0.9598728659210309, "calibration/buffer_distribution_entropy": 0.9835396752206689, "calibration/confidence_entropy": 0.4633211907048679, "calibration/coverage@0%": 0.09427083333333333, "calibration/coverage@1%": 0.2114583333333333, "calibration/coverage@10%": 0.6203125, "calibration/coverage@15%": 0.8171875, "calibration/coverage@20%": 0.9109375, "calibration/coverage@25%": 0.9598958333333332, "calibration/coverage@30%": 0.996875, "calibration/coverage@5%": 0.415625, "calibration/ece": 0.18747993802083335, "calibration/mean_confidence": 0.5860404057291666, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0009548611111111161, "completions/max_length": 3613.4, "completions/max_terminated_length": 3613.4, "completions/mean_length": 1067.2394775390626, "completions/mean_terminated_length": 1068.278662109375, "completions/min_length": 37.0, "completions/min_terminated_length": 250.6, "epoch": 1.7415907301158735, "grad_norm": 0.002794325351715088, "learning_rate": 1.8930288461538463e-06, "loss": 0.006, "num_tokens": 1938145768.0, "reward": 1.0198950052261353, "reward_std": 0.1042319193482399, "rewards/accuracy_reward": 0.7254340291023255, "rewards/brier_reward": 0.8326017618179321, "rewards/confidence_uniqueness_reward": 0.9485205292701722, "rewards/format_reward": 0.9990451335906982, "rewards/frontier_coverage_0": 0.022891762666404247, "rewards/frontier_coverage_1": 0.022891762666404247, "rewards/frontier_coverage_10": 0.048515988141298295, "rewards/frontier_coverage_15": 0.10812882035970688, "rewards/frontier_coverage_20": 0.18613292574882506, "rewards/frontier_coverage_25": 0.2780951738357544, "rewards/frontier_coverage_5": 0.022937561757862567, "rewards/frontier_entropy_batch_reward": -0.30318026542663573, "signal/accuracy_reward/centered_abs_mean": 0.12851019948720932, "signal/accuracy_reward/group_std_mean": 0.16943923532962799, "signal/accuracy_reward/group_zero_std_frac": 0.5138889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0418495416641236, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06425509974360466, "signal/advantage_abs_mean": 0.7732895016670227, "signal/advantage_pre_scale_abs_mean": 0.08091269582509994, "signal/advantage_pre_scale_std": 0.1275203213095665, "signal/advantage_std": 0.9828638076782227, "signal/brier_reward/centered_abs_mean": 0.11142251789569854, "signal/brier_reward/group_std_mean": 0.14267317950725555, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.180518040060997, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011142251826822758, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016142511367797853, "signal/confidence_uniqueness_reward/group_std_mean": 0.02136380970478058, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026282599568367003, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016142510809004306, "signal/format_reward/centered_abs_mean": 0.0017523871501907706, "signal/format_reward/group_std_mean": 0.00372017240151763, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.013838812150061131, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008761935750953853, "signal/frontier_coverage_0/centered_abs_mean": 0.1558055818080902, "signal/frontier_coverage_0/group_std_mean": 0.2006031185388565, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03602770790457725, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022280198289081456, "signal/frontier_coverage_1/centered_abs_mean": 0.1558055818080902, "signal/frontier_coverage_1/group_std_mean": 0.2006031185388565, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03602770790457725, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022280198289081456, "signal/frontier_coverage_10/centered_abs_mean": 0.06283498480916024, "signal/frontier_coverage_10/group_std_mean": 0.07953204363584518, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014551288262009621, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008985402528196573, "signal/frontier_coverage_15/centered_abs_mean": 0.07994391769170761, "signal/frontier_coverage_15/group_std_mean": 0.09906959235668182, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01861151084303856, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011431980179622768, "signal/frontier_coverage_20/centered_abs_mean": 0.11396068185567856, "signal/frontier_coverage_20/group_std_mean": 0.14176457226276398, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02656862176954746, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016296377405524253, "signal/frontier_coverage_25/centered_abs_mean": 0.1566249281167984, "signal/frontier_coverage_25/group_std_mean": 0.19541522860527039, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03651894517242908, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022397364489734175, "signal/frontier_coverage_5/centered_abs_mean": 0.1556170642375946, "signal/frontier_coverage_5/group_std_mean": 0.20036795735359192, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0359842661768198, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002225324069149792, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297608971595764, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3948035776615143, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5373982965946198, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03297608904540539, "step": 725 }, { "calibration/aurc": 0.09752518005747307, "calibration/batch_distribution_entropy": 0.9710835255316346, "calibration/buffer_distribution_entropy": 0.9840270711706335, "calibration/confidence_entropy": 0.47364203921770354, "calibration/coverage@0%": 0.11885063098346389, "calibration/coverage@1%": 0.11937146431679721, "calibration/coverage@10%": 0.6858409486510009, "calibration/coverage@15%": 0.7546344647519583, "calibration/coverage@20%": 0.8083156549173195, "calibration/coverage@25%": 0.8802083333333334, "calibration/coverage@30%": 0.9385416666666666, "calibration/coverage@5%": 0.5764101936466492, "calibration/ece": 0.19226834575582027, "calibration/mean_confidence": 0.5581783267311249, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0008680555555555802, "completions/max_length": 3871.0, "completions/max_terminated_length": 3871.0, "completions/mean_length": 1115.5952270507812, "completions/mean_terminated_length": 1116.6011596679687, "completions/min_length": 0.0, "completions/min_terminated_length": 261.4, "epoch": 1.7535905801177485, "grad_norm": 0.0026010002475231886, "learning_rate": 1.8629807692307695e-06, "loss": 0.0023, "num_tokens": 1954102993.0, "reward": 1.011343765258789, "reward_std": 0.09056015610694886, "rewards/accuracy_reward": 0.6978298544883728, "rewards/brier_reward": 0.8352743029594422, "rewards/confidence_uniqueness_reward": 0.9510626435279846, "rewards/format_reward": 0.9991319417953491, "rewards/frontier_coverage_0": 0.04333948716521263, "rewards/frontier_coverage_1": 0.04333948716521263, "rewards/frontier_coverage_10": 0.05298603735864162, "rewards/frontier_coverage_15": 0.10530868023633957, "rewards/frontier_coverage_20": 0.17865284085273742, "rewards/frontier_coverage_25": 0.26539782881736756, "rewards/frontier_coverage_5": 0.0433408307551872, "rewards/frontier_entropy_batch_reward": -0.2624367654323578, "signal/accuracy_reward/centered_abs_mean": 0.10245768278837204, "signal/accuracy_reward/group_std_mean": 0.14452196955680846, "signal/accuracy_reward/group_zero_std_frac": 0.5527777671813965, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8927505373954773, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05122884139418602, "signal/advantage_abs_mean": 0.7562663912773132, "signal/advantage_pre_scale_abs_mean": 0.0673715204000473, "signal/advantage_pre_scale_std": 0.11014018654823303, "signal/advantage_std": 0.9827435135841369, "signal/brier_reward/centered_abs_mean": 0.09993450939655305, "signal/brier_reward/group_std_mean": 0.13122970312833787, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17443340718746186, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009993451088666916, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0147280341014266, "signal/confidence_uniqueness_reward/group_std_mean": 0.0207037802785635, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.025786501169204713, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0014728034380823373, "signal/format_reward/centered_abs_mean": 0.0016710069146938622, "signal/format_reward/group_std_mean": 0.004611522704362869, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.014536320511251689, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0008355034573469311, "signal/frontier_coverage_0/centered_abs_mean": 0.15731069147586824, "signal/frontier_coverage_0/group_std_mean": 0.20592527985572814, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039265432953834535, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002249542810022831, "signal/frontier_coverage_1/centered_abs_mean": 0.15731069147586824, "signal/frontier_coverage_1/group_std_mean": 0.20592527985572814, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039265432953834535, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002249542810022831, "signal/frontier_coverage_10/centered_abs_mean": 0.06539921313524247, "signal/frontier_coverage_10/group_std_mean": 0.08309726417064667, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016339878924190997, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009352087508887053, "signal/frontier_coverage_15/centered_abs_mean": 0.06776027828454971, "signal/frontier_coverage_15/group_std_mean": 0.08447953909635544, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016957908309996127, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009689719881862402, "signal/frontier_coverage_20/centered_abs_mean": 0.09046011716127396, "signal/frontier_coverage_20/group_std_mean": 0.11392348855733872, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022640842571854593, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012935796519741416, "signal/frontier_coverage_25/centered_abs_mean": 0.12250153869390487, "signal/frontier_coverage_25/group_std_mean": 0.15532831847667694, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.030646225064992906, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017517718952149153, "signal/frontier_coverage_5/centered_abs_mean": 0.15711890459060668, "signal/frontier_coverage_5/group_std_mean": 0.20567532181739806, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03921758532524109, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002246800297871232, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3165683627128601, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3849177360534668, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5541592180728913, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03165683671832085, "step": 730 }, { "calibration/aurc": 0.17294304314154013, "calibration/batch_distribution_entropy": 0.9683228508572282, "calibration/buffer_distribution_entropy": 0.9847746833464687, "calibration/confidence_entropy": 0.4747779032447707, "calibration/coverage@0%": 0.13557033289817233, "calibration/coverage@1%": 0.2148729873803307, "calibration/coverage@10%": 0.35208197345517844, "calibration/coverage@15%": 0.4916666666666667, "calibration/coverage@20%": 0.6567708333333334, "calibration/coverage@25%": 0.765625, "calibration/coverage@30%": 0.8411458333333333, "calibration/coverage@5%": 0.3020098999129678, "calibration/ece": 0.19413389205287204, "calibration/mean_confidence": 0.5501757289490862, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444464, "completions/max_length": 3543.0, "completions/max_terminated_length": 3543.0, "completions/mean_length": 1157.7715087890624, "completions/mean_terminated_length": 1161.328662109375, "completions/min_length": 0.0, "completions/min_terminated_length": 279.0, "epoch": 1.7655904301196235, "grad_norm": 0.0026367914397269487, "learning_rate": 1.8329326923076924e-06, "loss": 0.0072, "num_tokens": 1970541641.0, "reward": 1.005565345287323, "reward_std": 0.10125984996557236, "rewards/accuracy_reward": 0.6956597208976746, "rewards/brier_reward": 0.8180548667907714, "rewards/confidence_uniqueness_reward": 0.9483519077301026, "rewards/format_reward": 0.996961796283722, "rewards/frontier_coverage_0": 0.029731686878949403, "rewards/frontier_coverage_1": 0.029731686878949403, "rewards/frontier_coverage_10": 0.046731724962592126, "rewards/frontier_coverage_15": 0.0951116681098938, "rewards/frontier_coverage_20": 0.1619558095932007, "rewards/frontier_coverage_25": 0.24104999899864196, "rewards/frontier_coverage_5": 0.029736339347437024, "rewards/frontier_entropy_batch_reward": -0.26453024744987486, "signal/accuracy_reward/centered_abs_mean": 0.12727864384651183, "signal/accuracy_reward/group_std_mean": 0.16613382697105408, "signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0535690546035767, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06363932192325591, "signal/advantage_abs_mean": 0.7730516195297241, "signal/advantage_pre_scale_abs_mean": 0.07883715778589248, "signal/advantage_pre_scale_std": 0.12539079785346985, "signal/advantage_std": 0.9828304886817932, "signal/brier_reward/centered_abs_mean": 0.10875847935676575, "signal/brier_reward/group_std_mean": 0.14072711169719695, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18023832142353058, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010875848308205604, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01687628235667944, "signal/confidence_uniqueness_reward/group_std_mean": 0.023477645963430403, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028091933578252792, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00168762831017375, "signal/format_reward/centered_abs_mean": 0.0035319010145030915, "signal/format_reward/group_std_mean": 0.006885326839983464, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.029567970614880323, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0017659505072515457, "signal/frontier_coverage_0/centered_abs_mean": 0.17091023325920104, "signal/frontier_coverage_0/group_std_mean": 0.22292305529117584, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04045567587018013, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024440162349492313, "signal/frontier_coverage_1/centered_abs_mean": 0.17091023325920104, "signal/frontier_coverage_1/group_std_mean": 0.22292305529117584, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04045567587018013, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024440162349492313, "signal/frontier_coverage_10/centered_abs_mean": 0.06697189211845397, "signal/frontier_coverage_10/group_std_mean": 0.08556520938873291, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015862343646585942, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009576980140991509, "signal/frontier_coverage_15/centered_abs_mean": 0.07027349472045899, "signal/frontier_coverage_15/group_std_mean": 0.08739349991083145, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016706252470612526, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010049110394902527, "signal/frontier_coverage_20/centered_abs_mean": 0.09674167782068252, "signal/frontier_coverage_20/group_std_mean": 0.1208344653248787, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02301716059446335, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013834059704095126, "signal/frontier_coverage_25/centered_abs_mean": 0.13244094848632812, "signal/frontier_coverage_25/group_std_mean": 0.16553622782230376, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03150580003857613, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001893905596807599, "signal/frontier_coverage_5/centered_abs_mean": 0.1706935554742813, "signal/frontier_coverage_5/group_std_mean": 0.22264962494373322, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.040404599905014035, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024409178644418717, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.30670446157455444, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3765598952770233, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5103662192821503, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.030670446157455445, "step": 735 }, { "calibration/aurc": 0.09985071608809112, "calibration/batch_distribution_entropy": 0.9283240535799662, "calibration/buffer_distribution_entropy": 0.9848685619549269, "calibration/confidence_entropy": 0.486386545829507, "calibration/coverage@0%": 0.043229166666666666, "calibration/coverage@1%": 0.09687499999999999, "calibration/coverage@10%": 0.6302083333333334, "calibration/coverage@15%": 0.7114583333333333, "calibration/coverage@20%": 0.9317708333333334, "calibration/coverage@25%": 0.9541666666666668, "calibration/coverage@30%": 0.9651041666666668, "calibration/coverage@5%": 0.5005208333333334, "calibration/ece": 0.18411820729166667, "calibration/mean_confidence": 0.611181165625, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00234375, "completions/max_length": 3516.0, "completions/max_terminated_length": 3516.0, "completions/mean_length": 1135.1943115234376, "completions/mean_terminated_length": 1137.8385498046875, "completions/min_length": 0.0, "completions/min_terminated_length": 298.8, "epoch": 1.7775902801214984, "grad_norm": 0.0029641035944223404, "learning_rate": 1.8028846153846156e-06, "loss": 0.0037, "num_tokens": 1986678471.0, "reward": 1.0208436727523804, "reward_std": 0.09842554479837418, "rewards/accuracy_reward": 0.7300347208976745, "rewards/brier_reward": 0.8294078826904296, "rewards/confidence_uniqueness_reward": 0.9471985816955566, "rewards/format_reward": 0.99765625, "rewards/frontier_coverage_0": 0.015275874444341753, "rewards/frontier_coverage_1": 0.015275874444341753, "rewards/frontier_coverage_10": 0.048988838493824, "rewards/frontier_coverage_15": 0.10942500680685044, "rewards/frontier_coverage_20": 0.1874927282333374, "rewards/frontier_coverage_25": 0.27740028500556946, "rewards/frontier_coverage_5": 0.015306396328378468, "rewards/frontier_entropy_batch_reward": -0.3023156225681305, "signal/accuracy_reward/centered_abs_mean": 0.11213107407093048, "signal/accuracy_reward/group_std_mean": 0.14724079966545106, "signal/accuracy_reward/group_zero_std_frac": 0.5861111044883728, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9607778906822204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05606553703546524, "signal/advantage_abs_mean": 0.7797276496887207, "signal/advantage_pre_scale_abs_mean": 0.07560006380081177, "signal/advantage_pre_scale_std": 0.12288236767053604, "signal/advantage_std": 0.9827636361122132, "signal/brier_reward/centered_abs_mean": 0.10327324271202087, "signal/brier_reward/group_std_mean": 0.13345020413398742, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17794868648052214, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010327324084937573, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018071673437952997, "signal/confidence_uniqueness_reward/group_std_mean": 0.026784731075167656, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03143479339778423, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018071672879159451, "signal/format_reward/centered_abs_mean": 0.004345703113358468, "signal/format_reward/group_std_mean": 0.009928835928440094, "signal/format_reward/group_zero_std_frac": 0.9527777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03843059604987502, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002172851556679234, "signal/frontier_coverage_0/centered_abs_mean": 0.14959966242313386, "signal/frontier_coverage_0/group_std_mean": 0.1921100914478302, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03681350834667683, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021392751950770617, "signal/frontier_coverage_1/centered_abs_mean": 0.14959966242313386, "signal/frontier_coverage_1/group_std_mean": 0.1921100914478302, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03681350834667683, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021392751950770617, "signal/frontier_coverage_10/centered_abs_mean": 0.06143470034003258, "signal/frontier_coverage_10/group_std_mean": 0.07756249755620956, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015148719027638435, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008785162470303475, "signal/frontier_coverage_15/centered_abs_mean": 0.07495491802692414, "signal/frontier_coverage_15/group_std_mean": 0.09279847294092178, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018493932485580445, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010718553327023982, "signal/frontier_coverage_20/centered_abs_mean": 0.1055668607354164, "signal/frontier_coverage_20/group_std_mean": 0.13145640641450881, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0260279543697834, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015096060931682586, "signal/frontier_coverage_25/centered_abs_mean": 0.14254448264837266, "signal/frontier_coverage_25/group_std_mean": 0.17810506224632264, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0351248387247324, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020383860683068633, "signal/frontier_coverage_5/centered_abs_mean": 0.1494191914796829, "signal/frontier_coverage_5/group_std_mean": 0.19188562035560608, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03676880933344364, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002136694313958287, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33148173689842225, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40152330994606017, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5731380939483642, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033148175477981566, "step": 740 }, { "calibration/aurc": 0.13612695574220837, "calibration/batch_distribution_entropy": 0.9295601144978141, "calibration/buffer_distribution_entropy": 0.9845439520338692, "calibration/confidence_entropy": 0.4805375002432246, "calibration/coverage@0%": 0.05367711053089643, "calibration/coverage@1%": 0.10628127719756311, "calibration/coverage@10%": 0.3990059290687554, "calibration/coverage@15%": 0.6105648933855526, "calibration/coverage@20%": 0.8020207789382072, "calibration/coverage@25%": 0.8947916666666667, "calibration/coverage@30%": 0.9427083333333333, "calibration/coverage@5%": 0.27295066362053955, "calibration/ece": 0.1459114410356832, "calibration/mean_confidence": 0.5916419980417755, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001128472222222232, "completions/max_length": 3589.0, "completions/max_terminated_length": 3589.0, "completions/mean_length": 1115.8822509765625, "completions/mean_terminated_length": 1117.1528076171876, "completions/min_length": 0.0, "completions/min_terminated_length": 297.4, "epoch": 1.7895901301233734, "grad_norm": 0.002868454437702894, "learning_rate": 1.7728365384615387e-06, "loss": 0.0094, "num_tokens": 2002651930.0, "reward": 1.0109495759010314, "reward_std": 0.10073102861642838, "rewards/accuracy_reward": 0.706249988079071, "rewards/brier_reward": 0.8292598009109498, "rewards/confidence_uniqueness_reward": 0.9492504000663757, "rewards/format_reward": 0.9988715171813964, "rewards/frontier_coverage_0": 0.02917664125561714, "rewards/frontier_coverage_1": 0.02917664125561714, "rewards/frontier_coverage_10": 0.04859147928655148, "rewards/frontier_coverage_15": 0.1054500088095665, "rewards/frontier_coverage_20": 0.17938823401927947, "rewards/frontier_coverage_25": 0.26465229988098143, "rewards/frontier_coverage_5": 0.02918265573680401, "rewards/frontier_entropy_batch_reward": -0.2926656484603882, "signal/accuracy_reward/centered_abs_mean": 0.12176649272441864, "signal/accuracy_reward/group_std_mean": 0.16606390178203584, "signal/accuracy_reward/group_zero_std_frac": 0.5055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9774610042572022, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06088324636220932, "signal/advantage_abs_mean": 0.7588282704353333, "signal/advantage_pre_scale_abs_mean": 0.07568231076002122, "signal/advantage_pre_scale_std": 0.12143019586801529, "signal/advantage_std": 0.9828806400299073, "signal/brier_reward/centered_abs_mean": 0.10687040835618973, "signal/brier_reward/group_std_mean": 0.1374327689409256, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17205712497234343, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01068704053759575, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015384367294609546, "signal/confidence_uniqueness_reward/group_std_mean": 0.02148051857948303, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0247656911611557, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015384367434307933, "signal/format_reward/centered_abs_mean": 0.0021104600746184587, "signal/format_reward/group_std_mean": 0.005053839646279812, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.016888655349612237, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0010552300373092294, "signal/frontier_coverage_0/centered_abs_mean": 0.1621706336736679, "signal/frontier_coverage_0/group_std_mean": 0.2084288328886032, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03729799836874008, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023190399631857874, "signal/frontier_coverage_1/centered_abs_mean": 0.1621706336736679, "signal/frontier_coverage_1/group_std_mean": 0.2084288328886032, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03729799836874008, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023190399631857874, "signal/frontier_coverage_10/centered_abs_mean": 0.06481548249721528, "signal/frontier_coverage_10/group_std_mean": 0.08101860135793686, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014912334084510804, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009268613997846842, "signal/frontier_coverage_15/centered_abs_mean": 0.07552316784858704, "signal/frontier_coverage_15/group_std_mean": 0.09370106011629105, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017398131638765336, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010799813084304334, "signal/frontier_coverage_20/centered_abs_mean": 0.10281162559986115, "signal/frontier_coverage_20/group_std_mean": 0.1287536635994911, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023685456439852714, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014702062588185072, "signal/frontier_coverage_25/centered_abs_mean": 0.1379389226436615, "signal/frontier_coverage_25/group_std_mean": 0.17409807741641997, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03176463283598423, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019725265679880976, "signal/frontier_coverage_5/centered_abs_mean": 0.16200172007083893, "signal/frontier_coverage_5/group_std_mean": 0.20821319222450257, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03725910410284996, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023166246246546507, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32640965580940245, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39191449284553526, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5255501866340637, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03264096602797508, "step": 745 }, { "calibration/aurc": 0.10590190080432076, "calibration/batch_distribution_entropy": 0.9592523462908378, "calibration/buffer_distribution_entropy": 0.9840037174285836, "calibration/confidence_entropy": 0.5020861036359838, "calibration/coverage@0%": 0.17800669060052218, "calibration/coverage@1%": 0.25043108137510883, "calibration/coverage@10%": 0.607252230200174, "calibration/coverage@15%": 0.71875, "calibration/coverage@20%": 0.8104166666666666, "calibration/coverage@25%": 0.8848958333333332, "calibration/coverage@30%": 0.9536458333333334, "calibration/coverage@5%": 0.34107783942558745, "calibration/ece": 0.1767842219253155, "calibration/mean_confidence": 0.5643607527809509, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001475694444444442, "completions/max_length": 3785.4, "completions/max_terminated_length": 3785.4, "completions/mean_length": 1129.112158203125, "completions/mean_terminated_length": 1130.78193359375, "completions/min_length": 0.0, "completions/min_terminated_length": 321.2, "epoch": 1.8015899801252484, "grad_norm": 0.0028191518504172564, "learning_rate": 1.7427884615384616e-06, "loss": 0.0067, "num_tokens": 2018771430.0, "reward": 1.01758474111557, "reward_std": 0.10301121026277542, "rewards/accuracy_reward": 0.7212673544883728, "rewards/brier_reward": 0.8407497763633728, "rewards/confidence_uniqueness_reward": 0.9472701191902161, "rewards/format_reward": 0.998524296283722, "rewards/frontier_coverage_0": 0.03290572431869805, "rewards/frontier_coverage_1": 0.03290572431869805, "rewards/frontier_coverage_10": 0.053498401492834094, "rewards/frontier_coverage_15": 0.1104421705007553, "rewards/frontier_coverage_20": 0.1861650675535202, "rewards/frontier_coverage_25": 0.27491688430309297, "rewards/frontier_coverage_5": 0.032948700070846826, "rewards/frontier_entropy_batch_reward": -0.31463190019130705, "signal/accuracy_reward/centered_abs_mean": 0.118994140625, "signal/accuracy_reward/group_std_mean": 0.16333966851234435, "signal/accuracy_reward/group_zero_std_frac": 0.5166666746139527, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9732692003250122, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0594970703125, "signal/advantage_abs_mean": 0.7545445919036865, "signal/advantage_pre_scale_abs_mean": 0.07700014412403107, "signal/advantage_pre_scale_std": 0.1261327385902405, "signal/advantage_std": 0.9828519821166992, "signal/brier_reward/centered_abs_mean": 0.10183228701353073, "signal/brier_reward/group_std_mean": 0.13478365838527678, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16659377813339232, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010183229111135006, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016775419190526008, "signal/confidence_uniqueness_reward/group_std_mean": 0.022979332879185677, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02745484858751297, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016775419702753426, "signal/format_reward/centered_abs_mean": 0.00267469622194767, "signal/format_reward/group_std_mean": 0.005587521148845554, "signal/format_reward/group_zero_std_frac": 0.9749999880790711, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02148791467770934, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001337348110973835, "signal/frontier_coverage_0/centered_abs_mean": 0.13947168439626695, "signal/frontier_coverage_0/group_std_mean": 0.18601751327514648, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.032587919384241104, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019944450818002225, "signal/frontier_coverage_1/centered_abs_mean": 0.13947168439626695, "signal/frontier_coverage_1/group_std_mean": 0.18601751327514648, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.032587919384241104, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019944450818002225, "signal/frontier_coverage_10/centered_abs_mean": 0.05676937475800514, "signal/frontier_coverage_10/group_std_mean": 0.07297334596514701, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013288442231714725, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008118020137771964, "signal/frontier_coverage_15/centered_abs_mean": 0.07833496183156967, "signal/frontier_coverage_15/group_std_mean": 0.09797212928533554, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01837916225194931, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001120189926587045, "signal/frontier_coverage_20/centered_abs_mean": 0.11107763350009918, "signal/frontier_coverage_20/group_std_mean": 0.13975699096918107, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02607056647539139, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001588410115800798, "signal/frontier_coverage_25/centered_abs_mean": 0.1506495952606201, "signal/frontier_coverage_25/group_std_mean": 0.19028232991695404, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0353517659008503, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002154289116151631, "signal/frontier_coverage_5/centered_abs_mean": 0.13931359052658082, "signal/frontier_coverage_5/group_std_mean": 0.18581429719924927, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0325510174036026, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00199218422640115, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32679831981658936, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3907285392284393, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5357029259204864, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032679833471775055, "step": 750 }, { "epoch": 1.8015899801252484, "eval_calibration/aurc": 0.14554261102872565, "eval_calibration/batch_distribution_entropy": 0.91718440536761, "eval_calibration/buffer_distribution_entropy": 0.9837377091437123, "eval_calibration/confidence_entropy": 0.48853757066041226, "eval_calibration/coverage@0%": 0.18229166666666666, "eval_calibration/coverage@1%": 0.18229166666666666, "eval_calibration/coverage@10%": 0.5208333333333334, "eval_calibration/coverage@15%": 0.671875, "eval_calibration/coverage@20%": 0.7708333333333334, "eval_calibration/coverage@25%": 0.8802083333333334, "eval_calibration/coverage@30%": 0.921875, "eval_calibration/coverage@5%": 0.2552083333333333, "eval_calibration/ece": 0.21193911458333328, "eval_calibration/mean_confidence": 0.5900025520833334, "eval_completions/clipped_ratio": 0.0008680555555555617, "eval_completions/max_length": 2721.3333333333335, "eval_completions/max_terminated_length": 2721.3333333333335, "eval_completions/mean_length": 1122.1944580078125, "eval_completions/mean_terminated_length": 1123.159200032552, "eval_completions/min_length": 336.8333333333333, "eval_completions/min_terminated_length": 404.5, "eval_loss": 0.0, "eval_num_tokens": 2018771430.0, "eval_reward": 0.9297488828500112, "eval_reward_std": 0.23093928893407187, "eval_rewards/accuracy_reward": 0.6935763955116272, "eval_rewards/brier_reward": 0.8346609771251678, "eval_rewards/confidence_uniqueness_reward": 0.8985180159409841, "eval_rewards/format_reward": 0.9991319477558136, "eval_rewards/frontier_coverage_0": 0.03844601707533002, "eval_rewards/frontier_coverage_1": 0.03844601707533002, "eval_rewards/frontier_coverage_10": 0.04998234659433365, "eval_rewards/frontier_coverage_15": 0.10391578078269958, "eval_rewards/frontier_coverage_20": 0.17418100436528525, "eval_rewards/frontier_coverage_25": 0.25515559564034146, "eval_rewards/frontier_coverage_5": 0.03847376614188155, "eval_rewards/frontier_entropy_batch_reward": -0.9991319477558136, "eval_runtime": 158.3973, "eval_samples_per_second": 6.313, "eval_signal/accuracy_reward/centered_abs_mean": 0.4097764740387599, "eval_signal/accuracy_reward/group_std_mean": 0.45795584718386334, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8910409013430277, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20488823701937994, "eval_signal/advantage_abs_mean": 0.87361212571462, "eval_signal/advantage_pre_scale_abs_mean": 0.202990693350633, "eval_signal/advantage_pre_scale_std": 0.2284200762708982, "eval_signal/advantage_std": 0.9863944252332052, "eval_signal/brier_reward/centered_abs_mean": 0.1545459379752477, "eval_signal/brier_reward/group_std_mean": 0.2079830765724182, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06722560152411461, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.015454593890657028, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04118582233786583, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05077329402168592, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.017912627197802067, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004118582389007012, "eval_signal/format_reward/centered_abs_mean": 0.0016818575871487458, "eval_signal/format_reward/group_std_mean": 0.0049104637776811915, "eval_signal/format_reward/group_zero_std_frac": 0.9722222288449606, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0035001467913389206, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2608467886845271, "eval_signal/frontier_coverage_0/group_std_mean": 0.36050594846407574, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016234679458041985, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0037301090002680817, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2608467886845271, "eval_signal/frontier_coverage_1/group_std_mean": 0.36050594846407574, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016234679458041985, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0037301090002680817, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.07517050827542941, "eval_signal/frontier_coverage_10/group_std_mean": 0.10018332054217656, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004679729075481494, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010749382199719548, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.11889106159408887, "eval_signal/frontier_coverage_15/group_std_mean": 0.1488691916068395, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007406616040195028, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001700142165645957, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20220743864774704, "eval_signal/frontier_coverage_20/group_std_mean": 0.2463468238711357, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.01259286655113101, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002891566293934981, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.29517705241839093, "eval_signal/frontier_coverage_25/group_std_mean": 0.35718540847301483, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018373853837450344, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004221031907945871, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2604764675100644, "eval_signal/frontier_coverage_5/group_std_mean": 0.3600422491629918, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016211653128266335, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037248135389139256, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007000294669220845, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746, "eval_steps_per_second": 0.038, "step": 750 }, { "epoch": 1.8015899801252484, "step": 750, "train_probe_calibration/aurc": 0.10360412267687767, "train_probe_calibration/batch_distribution_entropy": 0.9012179728421429, "train_probe_calibration/buffer_distribution_entropy": 0.9835511144626669, "train_probe_calibration/confidence_entropy": 0.4963477415046183, "train_probe_calibration/coverage@0%": 0.3125, "train_probe_calibration/coverage@1%": 0.3125, "train_probe_calibration/coverage@10%": 0.640625, "train_probe_calibration/coverage@15%": 0.734375, "train_probe_calibration/coverage@20%": 0.8541666666666666, "train_probe_calibration/coverage@25%": 0.9635416666666666, "train_probe_calibration/coverage@30%": 0.9895833333333334, "train_probe_calibration/coverage@5%": 0.375, "train_probe_calibration/ece": 0.2142140625, "train_probe_calibration/mean_confidence": 0.5926161458333333, "train_probe_completions/clipped_ratio": 0.0008680555555555617, "train_probe_completions/max_length": 3456.1666666666665, "train_probe_completions/max_terminated_length": 3456.1666666666665, "train_probe_completions/mean_length": 1130.0889282226562, "train_probe_completions/mean_terminated_length": 1131.0578002929688, "train_probe_completions/min_length": 281.3333333333333, "train_probe_completions/min_terminated_length": 343.1666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 2018771430.0, "train_probe_reward": 0.95806951324145, "train_probe_reward_std": 0.21482898046573004, "train_probe_rewards/accuracy_reward": 0.7499999900658926, "train_probe_rewards/brier_reward": 0.8428874909877777, "train_probe_rewards/confidence_uniqueness_reward": 0.8931871751944224, "train_probe_rewards/format_reward": 0.9991319477558136, "train_probe_rewards/frontier_coverage_0": 0.012033387494739145, "train_probe_rewards/frontier_coverage_1": 0.012033387494739145, "train_probe_rewards/frontier_coverage_10": 0.0505746491253376, "train_probe_rewards/frontier_coverage_15": 0.11439343293507893, "train_probe_rewards/frontier_coverage_20": 0.19532609979311624, "train_probe_rewards/frontier_coverage_25": 0.28949019064505893, "train_probe_rewards/frontier_coverage_5": 0.012110456203420958, "train_probe_rewards/frontier_entropy_batch_reward": -0.9991319477558136, "train_probe_runtime": 188.5693, "train_probe_samples_per_second": 5.303, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3663194427887599, "train_probe_signal/accuracy_reward/group_std_mean": 0.43311170240243274, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8608072102069855, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18315972139437994, "train_probe_signal/advantage_abs_mean": 0.8269814153512319, "train_probe_signal/advantage_pre_scale_abs_mean": 0.17903072386980057, "train_probe_signal/advantage_pre_scale_std": 0.2130366489291191, "train_probe_signal/advantage_std": 0.9863598346710205, "train_probe_signal/brier_reward/centered_abs_mean": 0.14471079657475153, "train_probe_signal/brier_reward/group_std_mean": 0.19478769848744074, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06802343266705672, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014471080464621386, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04179748644431432, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.05130287570257982, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.019671458440522354, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004179748706519604, "train_probe_signal/format_reward/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/format_reward/group_std_mean": 0.0049104637776811915, "train_probe_signal/format_reward/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.0038858645906051, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0008409287935743729, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.2487302447358767, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.3555862208207448, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01672346827884515, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0035568424112473926, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.2487302447358767, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.3555862208207448, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01672346827884515, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0035568424112473926, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07384044552842776, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.09972128023703893, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004964815763135751, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001055918352600808, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11256096636255582, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.13923830290635428, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007571271853521466, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016096217441372573, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.18888175984223685, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.22895304610331854, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012701889500021935, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002701009080434839, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2721845557292302, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.32942800720532733, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0183031614869833, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0038922393772130213, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.24837070206801096, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3551288843154907, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016699314738313358, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0035517010061691203, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0016818575871487458, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.0049104637776811915, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9722222288449606, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0007771729336430629, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0001681857587148746, "train_probe_steps_per_second": 0.032 }, { "calibration/aurc": 0.16302321103458023, "calibration/batch_distribution_entropy": 0.9463972655806352, "calibration/buffer_distribution_entropy": 0.9836929635418779, "calibration/confidence_entropy": 0.5061289924018656, "calibration/coverage@0%": 0.055265748031496066, "calibration/coverage@1%": 0.055265748031496066, "calibration/coverage@10%": 0.33877132545931754, "calibration/coverage@15%": 0.5459153543307086, "calibration/coverage@20%": 0.7237942913385826, "calibration/coverage@25%": 0.8281044947506562, "calibration/coverage@30%": 0.9072916666666666, "calibration/coverage@5%": 0.14729740813648293, "calibration/ece": 0.1197935157480315, "calibration/mean_confidence": 0.5964368398950131, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.000694444444444442, "completions/max_length": 3647.2, "completions/max_terminated_length": 3647.2, "completions/mean_length": 1137.7447021484375, "completions/mean_terminated_length": 1138.55546875, "completions/min_length": 140.4, "completions/min_terminated_length": 322.2, "epoch": 1.8135898301271234, "grad_norm": 0.0029852569568902254, "learning_rate": 1.7127403846153848e-06, "loss": 0.0088, "num_tokens": 2034969545.0, "reward": 1.0164843440055846, "reward_std": 0.09799668043851853, "rewards/accuracy_reward": 0.7115451455116272, "rewards/brier_reward": 0.8474408864974976, "rewards/confidence_uniqueness_reward": 0.9490378737449646, "rewards/format_reward": 0.9993055462837219, "rewards/frontier_coverage_0": 0.0392310387454927, "rewards/frontier_coverage_1": 0.0392310387454927, "rewards/frontier_coverage_10": 0.05753873959183693, "rewards/frontier_coverage_15": 0.11426883339881896, "rewards/frontier_coverage_20": 0.1901752233505249, "rewards/frontier_coverage_25": 0.2787013977766037, "rewards/frontier_coverage_5": 0.03928067879751325, "rewards/frontier_entropy_batch_reward": -0.2943439185619354, "signal/accuracy_reward/centered_abs_mean": 0.11543511301279068, "signal/accuracy_reward/group_std_mean": 0.15154503285884857, "signal/accuracy_reward/group_zero_std_frac": 0.5750000178813934, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9787806272506714, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05771755650639534, "signal/advantage_abs_mean": 0.7752339601516723, "signal/advantage_pre_scale_abs_mean": 0.0760658174753189, "signal/advantage_pre_scale_std": 0.12216138690710068, "signal/advantage_std": 0.9827526807785034, "signal/brier_reward/centered_abs_mean": 0.0946065753698349, "signal/brier_reward/group_std_mean": 0.12359896749258041, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1623237133026123, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009460657835006714, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01507211085408926, "signal/confidence_uniqueness_reward/group_std_mean": 0.020275114849209786, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026477007195353508, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00150721101090312, "signal/format_reward/centered_abs_mean": 0.0013129340135492385, "signal/format_reward/group_std_mean": 0.0032778555527329446, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.012105725053697825, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0006564670067746193, "signal/frontier_coverage_0/centered_abs_mean": 0.14191508293151855, "signal/frontier_coverage_0/group_std_mean": 0.18624544739723206, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03474088981747627, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020293857669457793, "signal/frontier_coverage_1/centered_abs_mean": 0.14191508293151855, "signal/frontier_coverage_1/group_std_mean": 0.18624544739723206, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03474088981747627, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020293857669457793, "signal/frontier_coverage_10/centered_abs_mean": 0.05707173347473145, "signal/frontier_coverage_10/group_std_mean": 0.07210961431264877, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01410923469811678, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000816125818528235, "signal/frontier_coverage_15/centered_abs_mean": 0.07441399842500687, "signal/frontier_coverage_15/group_std_mean": 0.09240110963582993, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018513403832912445, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010641201632097363, "signal/frontier_coverage_20/centered_abs_mean": 0.10533722341060639, "signal/frontier_coverage_20/group_std_mean": 0.13098296225070954, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026201526075601576, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001506322273053229, "signal/frontier_coverage_25/centered_abs_mean": 0.143466717004776, "signal/frontier_coverage_25/group_std_mean": 0.17837926149368286, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03564382195472717, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00205157408490777, "signal/frontier_coverage_5/centered_abs_mean": 0.1416635975241661, "signal/frontier_coverage_5/group_std_mean": 0.1859228640794754, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03467725887894631, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020257893018424513, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32274038195610044, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3907089829444885, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.561868679523468, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032274038344621656, "step": 755 }, { "calibration/aurc": 0.1406128362914006, "calibration/batch_distribution_entropy": 0.9271639045183393, "calibration/buffer_distribution_entropy": 0.9835649008372214, "calibration/confidence_entropy": 0.49665028010211987, "calibration/coverage@0%": 0.08821393603133158, "calibration/coverage@1%": 0.14774396214099217, "calibration/coverage@10%": 0.390082408616188, "calibration/coverage@15%": 0.5203532963446474, "calibration/coverage@20%": 0.782351501305483, "calibration/coverage@25%": 0.906036499129678, "calibration/coverage@30%": 0.9712793733681462, "calibration/coverage@5%": 0.22753073324630116, "calibration/ece": 0.14779586510008708, "calibration/mean_confidence": 0.6198723345844213, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001128472222222232, "completions/max_length": 3703.0, "completions/max_terminated_length": 3703.0, "completions/mean_length": 1151.5933349609375, "completions/mean_terminated_length": 1152.89443359375, "completions/min_length": 0.0, "completions/min_terminated_length": 280.6, "epoch": 1.8255896801289984, "grad_norm": 0.0028999089263379574, "learning_rate": 1.682692307692308e-06, "loss": -0.0036, "num_tokens": 2051306492.0, "reward": 1.0116494297981262, "reward_std": 0.0956185519695282, "rewards/accuracy_reward": 0.7160590291023254, "rewards/brier_reward": 0.8324668526649475, "rewards/confidence_uniqueness_reward": 0.9463574290275574, "rewards/format_reward": 0.9987847089767456, "rewards/frontier_coverage_0": 0.02435053661465645, "rewards/frontier_coverage_1": 0.02435053661465645, "rewards/frontier_coverage_10": 0.054039137065410615, "rewards/frontier_coverage_15": 0.11133374571800232, "rewards/frontier_coverage_20": 0.18582258224487305, "rewards/frontier_coverage_25": 0.2720319747924805, "rewards/frontier_coverage_5": 0.02443299610167742, "rewards/frontier_entropy_batch_reward": -0.336128431558609, "signal/accuracy_reward/centered_abs_mean": 0.113134765625, "signal/accuracy_reward/group_std_mean": 0.1469035863876343, "signal/accuracy_reward/group_zero_std_frac": 0.5833333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9879292249679565, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0565673828125, "signal/advantage_abs_mean": 0.7799701571464539, "signal/advantage_pre_scale_abs_mean": 0.07455487251281738, "signal/advantage_pre_scale_std": 0.11972530782222748, "signal/advantage_std": 0.9827275753021241, "signal/brier_reward/centered_abs_mean": 0.10523280948400497, "signal/brier_reward/group_std_mean": 0.13444634526968002, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18512049913406373, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010523280873894692, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017059031501412393, "signal/confidence_uniqueness_reward/group_std_mean": 0.023790639638900758, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030060911551117897, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017059032339602708, "signal/format_reward/centered_abs_mean": 0.002278645837213844, "signal/format_reward/group_std_mean": 0.005544885993003845, "signal/format_reward/group_zero_std_frac": 0.9722222089767456, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019949254114180803, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001139322918606922, "signal/frontier_coverage_0/centered_abs_mean": 0.14627977907657624, "signal/frontier_coverage_0/group_std_mean": 0.185261470079422, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036748398840427396, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020918007707223297, "signal/frontier_coverage_1/centered_abs_mean": 0.14627977907657624, "signal/frontier_coverage_1/group_std_mean": 0.185261470079422, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036748398840427396, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020918007707223297, "signal/frontier_coverage_10/centered_abs_mean": 0.0604740172624588, "signal/frontier_coverage_10/group_std_mean": 0.0752995565533638, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015242612175643443, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008647784125059843, "signal/frontier_coverage_15/centered_abs_mean": 0.08006558865308762, "signal/frontier_coverage_15/group_std_mean": 0.098976169526577, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020191213116049767, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011449379147961737, "signal/frontier_coverage_20/centered_abs_mean": 0.11131712347269059, "signal/frontier_coverage_20/group_std_mean": 0.13847638368606568, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028045033290982246, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015918348217383027, "signal/frontier_coverage_25/centered_abs_mean": 0.14897879362106323, "signal/frontier_coverage_25/group_std_mean": 0.18610316812992095, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03750268965959549, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002130396710708737, "signal/frontier_coverage_5/centered_abs_mean": 0.14582152664661407, "signal/frontier_coverage_5/group_std_mean": 0.18467966616153716, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036631081253290176, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00208524779882282, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3315341889858246, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3946572482585907, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5843674898147583, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03315341994166374, "step": 760 }, { "calibration/aurc": 0.10008487753171016, "calibration/batch_distribution_entropy": 0.9277099916065437, "calibration/buffer_distribution_entropy": 0.9826846621379814, "calibration/confidence_entropy": 0.490593213002777, "calibration/coverage@0%": 0.2011480148342059, "calibration/coverage@1%": 0.2521896815008726, "calibration/coverage@10%": 0.45394033595113437, "calibration/coverage@15%": 0.7749072862129144, "calibration/coverage@20%": 0.8547420375218149, "calibration/coverage@25%": 0.975, "calibration/coverage@30%": 0.99375, "calibration/coverage@5%": 0.3809200479930192, "calibration/ece": 0.19351178340423209, "calibration/mean_confidence": 0.6179748778086823, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0012152777777777902, "completions/max_length": 3756.2, "completions/max_terminated_length": 3756.2, "completions/mean_length": 1263.1088623046876, "completions/mean_terminated_length": 1264.64208984375, "completions/min_length": 0.0, "completions/min_terminated_length": 371.4, "epoch": 1.8375895301308733, "grad_norm": 0.0026731251273304224, "learning_rate": 1.6526442307692309e-06, "loss": 0.0046, "num_tokens": 2068969922.0, "reward": 1.014076590538025, "reward_std": 0.09386872947216034, "rewards/accuracy_reward": 0.7167534589767456, "rewards/brier_reward": 0.8214235782623291, "rewards/confidence_uniqueness_reward": 0.9496928334236145, "rewards/format_reward": 0.9987847208976746, "rewards/frontier_coverage_0": 0.015309961698949336, "rewards/frontier_coverage_1": 0.015309961698949336, "rewards/frontier_coverage_10": 0.0462947279214859, "rewards/frontier_coverage_15": 0.10164597630500793, "rewards/frontier_coverage_20": 0.17245526611804962, "rewards/frontier_coverage_25": 0.2544937252998352, "rewards/frontier_coverage_5": 0.015843074396252634, "rewards/frontier_entropy_batch_reward": -0.29689528942108157, "signal/accuracy_reward/centered_abs_mean": 0.10371636152267456, "signal/accuracy_reward/group_std_mean": 0.14323081374168395, "signal/accuracy_reward/group_zero_std_frac": 0.5666666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8753082990646363, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05185818076133728, "signal/advantage_abs_mean": 0.764187490940094, "signal/advantage_pre_scale_abs_mean": 0.07085417807102204, "signal/advantage_pre_scale_std": 0.11614946275949478, "signal/advantage_std": 0.9827946066856384, "signal/brier_reward/centered_abs_mean": 0.10038460642099381, "signal/brier_reward/group_std_mean": 0.1296519249677658, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16992796957492828, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010038460791110992, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.015540579706430436, "signal/confidence_uniqueness_reward/group_std_mean": 0.021095557510852812, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026413920521736144, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0015540578635409475, "signal/format_reward/centered_abs_mean": 0.002202690974809229, "signal/format_reward/group_std_mean": 0.004494689032435417, "signal/format_reward/group_zero_std_frac": 0.9805555462837219, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.018810847867280246, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0011013454874046146, "signal/frontier_coverage_0/centered_abs_mean": 0.14608448445796968, "signal/frontier_coverage_0/group_std_mean": 0.1897787034511566, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03529713377356529, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002089008130133152, "signal/frontier_coverage_1/centered_abs_mean": 0.14608448445796968, "signal/frontier_coverage_1/group_std_mean": 0.1897787034511566, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03529713377356529, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002089008130133152, "signal/frontier_coverage_10/centered_abs_mean": 0.05769690573215484, "signal/frontier_coverage_10/group_std_mean": 0.07240410298109054, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013960456103086471, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000825065781828016, "signal/frontier_coverage_15/centered_abs_mean": 0.07257926762104035, "signal/frontier_coverage_15/group_std_mean": 0.08965266942977905, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017614268139004706, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010378835606388748, "signal/frontier_coverage_20/centered_abs_mean": 0.09984047561883927, "signal/frontier_coverage_20/group_std_mean": 0.12426990419626235, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024254824593663217, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014277187641710044, "signal/frontier_coverage_25/centered_abs_mean": 0.13424190729856492, "signal/frontier_coverage_25/group_std_mean": 0.16846884191036224, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03261243365705013, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019196592504158616, "signal/frontier_coverage_5/centered_abs_mean": 0.1453851044178009, "signal/frontier_coverage_5/group_std_mean": 0.1888649046421051, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035127484053373334, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020790069829672575, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33021358251571653, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3971860229969025, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5606383442878723, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033021359145641326, "step": 765 }, { "calibration/aurc": 0.09759682681677295, "calibration/batch_distribution_entropy": 0.9441530418407963, "calibration/buffer_distribution_entropy": 0.982589112635219, "calibration/confidence_entropy": 0.4846604716444439, "calibration/coverage@0%": 0.14087299178769266, "calibration/coverage@1%": 0.23946959753181796, "calibration/coverage@10%": 0.5813764628333183, "calibration/coverage@15%": 0.7180358140950996, "calibration/coverage@20%": 0.8662592569437845, "calibration/coverage@25%": 0.9567708333333332, "calibration/coverage@30%": 0.9848958333333332, "calibration/coverage@5%": 0.42531747960872474, "calibration/ece": 0.16580303715657335, "calibration/mean_confidence": 0.6179463742795799, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008420138888888906, "completions/max_length": 3919.4, "completions/max_terminated_length": 3919.4, "completions/mean_length": 1410.801220703125, "completions/mean_terminated_length": 1422.8932373046875, "completions/min_length": 0.0, "completions/min_terminated_length": 377.6, "epoch": 1.8495893801327483, "grad_norm": 0.002413135953247547, "learning_rate": 1.622596153846154e-06, "loss": -0.0172, "num_tokens": 2088303696.0, "reward": 1.0102350831031799, "reward_std": 0.10468598753213883, "rewards/accuracy_reward": 0.7231770753860474, "rewards/brier_reward": 0.820259952545166, "rewards/confidence_uniqueness_reward": 0.9406996846199036, "rewards/format_reward": 0.9915798664093017, "rewards/frontier_coverage_0": 0.01387081373250112, "rewards/frontier_coverage_1": 0.01387081373250112, "rewards/frontier_coverage_10": 0.049073401093482974, "rewards/frontier_coverage_15": 0.10792291015386582, "rewards/frontier_coverage_20": 0.1824491500854492, "rewards/frontier_coverage_25": 0.2690129905939102, "rewards/frontier_coverage_5": 0.014235794026171788, "rewards/frontier_entropy_batch_reward": -0.3254063129425049, "signal/accuracy_reward/centered_abs_mean": 0.11168077290058136, "signal/accuracy_reward/group_std_mean": 0.1492277979850769, "signal/accuracy_reward/group_zero_std_frac": 0.5638889074325562, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9240718126296997, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05584038645029068, "signal/advantage_abs_mean": 0.7747442603111268, "signal/advantage_pre_scale_abs_mean": 0.08016434460878372, "signal/advantage_pre_scale_std": 0.13522610068321228, "signal/advantage_std": 0.98283451795578, "signal/brier_reward/centered_abs_mean": 0.10873527824878693, "signal/brier_reward/group_std_mean": 0.13798998296260834, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17993208169937133, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010873528011143208, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02466178871691227, "signal/confidence_uniqueness_reward/group_std_mean": 0.03505043126642704, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04082990363240242, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024661787785589693, "signal/format_reward/centered_abs_mean": 0.01201714426279068, "signal/format_reward/group_std_mean": 0.019468109123408794, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0995325818657875, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00600857213139534, "signal/frontier_coverage_0/centered_abs_mean": 0.14618025720119476, "signal/frontier_coverage_0/group_std_mean": 0.1870233803987503, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034592658281326294, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020903776632621883, "signal/frontier_coverage_1/centered_abs_mean": 0.14618025720119476, "signal/frontier_coverage_1/group_std_mean": 0.1870233803987503, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034592658281326294, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020903776632621883, "signal/frontier_coverage_10/centered_abs_mean": 0.05835925862193107, "signal/frontier_coverage_10/group_std_mean": 0.0733158677816391, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013812579214572906, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008345374371856451, "signal/frontier_coverage_15/centered_abs_mean": 0.07714459002017975, "signal/frontier_coverage_15/group_std_mean": 0.09536905735731124, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018274228647351266, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011031676083803176, "signal/frontier_coverage_20/centered_abs_mean": 0.10645209103822709, "signal/frontier_coverage_20/group_std_mean": 0.13283228129148483, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025221217051148416, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015222648857161404, "signal/frontier_coverage_25/centered_abs_mean": 0.14250740706920623, "signal/frontier_coverage_25/group_std_mean": 0.17897864580154418, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03375965058803558, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020378559362143277, "signal/frontier_coverage_5/centered_abs_mean": 0.1453123450279236, "signal/frontier_coverage_5/group_std_mean": 0.18592797815799714, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03438692018389702, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00207796657923609, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3373031973838806, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40034814476966857, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.55850750207901, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033730319887399676, "step": 770 }, { "calibration/aurc": 0.08260132080338653, "calibration/batch_distribution_entropy": 0.9475478623002089, "calibration/buffer_distribution_entropy": 0.982594037282311, "calibration/confidence_entropy": 0.48343041044238033, "calibration/coverage@0%": 0.1554240440457745, "calibration/coverage@1%": 0.2635180388238424, "calibration/coverage@10%": 0.722244240603098, "calibration/coverage@15%": 0.7966044837091772, "calibration/coverage@20%": 0.8446356305902141, "calibration/coverage@25%": 0.8989537070808928, "calibration/coverage@30%": 0.9299806189076503, "calibration/coverage@5%": 0.6204585371542952, "calibration/ece": 0.22259992062477957, "calibration/mean_confidence": 0.5704307778797377, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007899305555555559, "completions/max_length": 4067.8, "completions/max_terminated_length": 4067.8, "completions/mean_length": 1538.70595703125, "completions/mean_terminated_length": 1550.9579833984376, "completions/min_length": 0.0, "completions/min_terminated_length": 450.6, "epoch": 1.8615892301346233, "grad_norm": 0.0024537527933716774, "learning_rate": 1.592548076923077e-06, "loss": -0.0182, "num_tokens": 2109139444.0, "reward": 1.02263902425766, "reward_std": 0.10763338208198547, "rewards/accuracy_reward": 0.7459201335906982, "rewards/brier_reward": 0.813153886795044, "rewards/confidence_uniqueness_reward": 0.9420896649360657, "rewards/format_reward": 0.9921006917953491, "rewards/frontier_coverage_0": -0.008578380825929344, "rewards/frontier_coverage_1": -0.008578380825929344, "rewards/frontier_coverage_10": 0.047513436526060104, "rewards/frontier_coverage_15": 0.1067799985408783, "rewards/frontier_coverage_20": 0.18154163658618927, "rewards/frontier_coverage_25": 0.26863664388656616, "rewards/frontier_coverage_5": -0.007999213365837931, "rewards/frontier_entropy_batch_reward": -0.3018000781536102, "signal/accuracy_reward/centered_abs_mean": 0.12080620676279068, "signal/accuracy_reward/group_std_mean": 0.15733032971620559, "signal/accuracy_reward/group_zero_std_frac": 0.5583333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0052472591400146, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06040310338139534, "signal/advantage_abs_mean": 0.767851459980011, "signal/advantage_pre_scale_abs_mean": 0.08170493394136429, "signal/advantage_pre_scale_std": 0.1378851354122162, "signal/advantage_std": 0.9828094124794007, "signal/brier_reward/centered_abs_mean": 0.11120356619358063, "signal/brier_reward/group_std_mean": 0.14290755689144136, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1852889508008957, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011120356805622577, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02482622042298317, "signal/confidence_uniqueness_reward/group_std_mean": 0.03989123106002808, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.041999526694417, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024826219072565437, "signal/format_reward/centered_abs_mean": 0.013058810774236918, "signal/format_reward/group_std_mean": 0.025868096575140952, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11041791215538979, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006529405387118459, "signal/frontier_coverage_0/centered_abs_mean": 0.15929721891880036, "signal/frontier_coverage_0/group_std_mean": 0.20568397343158723, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037854181975126265, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022779503371566532, "signal/frontier_coverage_1/centered_abs_mean": 0.15929721891880036, "signal/frontier_coverage_1/group_std_mean": 0.20568397343158723, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037854181975126265, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022779503371566532, "signal/frontier_coverage_10/centered_abs_mean": 0.06027785316109657, "signal/frontier_coverage_10/group_std_mean": 0.07613334357738495, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01442383099347353, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008619732921943069, "signal/frontier_coverage_15/centered_abs_mean": 0.07339970767498016, "signal/frontier_coverage_15/group_std_mean": 0.09090597331523895, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017760027572512625, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010496158269234003, "signal/frontier_coverage_20/centered_abs_mean": 0.1007804036140442, "signal/frontier_coverage_20/group_std_mean": 0.12484865486621857, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024397116526961325, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014411597978323698, "signal/frontier_coverage_25/centered_abs_mean": 0.13568875044584275, "signal/frontier_coverage_25/group_std_mean": 0.16859400570392608, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03280252404510975, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.001940349186770618, "signal/frontier_coverage_5/centered_abs_mean": 0.15811103582382202, "signal/frontier_coverage_5/group_std_mean": 0.20422449707984924, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0375734880566597, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002260987856425345, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31984294652938844, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3863619029521942, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5378746867179871, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0319842953234911, "step": 775 }, { "calibration/aurc": 0.16485442668940056, "calibration/batch_distribution_entropy": 0.9528827240900295, "calibration/buffer_distribution_entropy": 0.9814258866145845, "calibration/confidence_entropy": 0.47629329018267175, "calibration/coverage@0%": 0.017224309267334672, "calibration/coverage@1%": 0.017224309267334672, "calibration/coverage@10%": 0.278715365113757, "calibration/coverage@15%": 0.4485615693022387, "calibration/coverage@20%": 0.7906145837605202, "calibration/coverage@25%": 0.8919764412145321, "calibration/coverage@30%": 0.9786417536988686, "calibration/coverage@5%": 0.022432642600668008, "calibration/ece": 0.14761790194580762, "calibration/mean_confidence": 0.6011017510716694, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005902777777777768, "completions/max_length": 4064.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 1609.5780517578125, "completions/mean_terminated_length": 1619.094287109375, "completions/min_length": 0.0, "completions/min_terminated_length": 517.0, "epoch": 1.8735890801364983, "grad_norm": 0.0023489040322601795, "learning_rate": 1.5625e-06, "loss": -0.0162, "num_tokens": 2130754455.0, "reward": 1.02072274684906, "reward_std": 0.10271647274494171, "rewards/accuracy_reward": 0.7302083373069763, "rewards/brier_reward": 0.8394347190856933, "rewards/confidence_uniqueness_reward": 0.9440023183822632, "rewards/format_reward": 0.9940972328186035, "rewards/frontier_coverage_0": 0.023285062378272416, "rewards/frontier_coverage_1": 0.023285062378272416, "rewards/frontier_coverage_10": 0.05837507769465446, "rewards/frontier_coverage_15": 0.11961217522621155, "rewards/frontier_coverage_20": 0.1975026994943619, "rewards/frontier_coverage_25": 0.2890557885169983, "rewards/frontier_coverage_5": 0.023518830770626664, "rewards/frontier_entropy_batch_reward": -0.3027906119823456, "signal/accuracy_reward/centered_abs_mean": 0.10797525942325592, "signal/accuracy_reward/group_std_mean": 0.1463481605052948, "signal/accuracy_reward/group_zero_std_frac": 0.5666666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.917884886264801, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05398762971162796, "signal/advantage_abs_mean": 0.7607001185417175, "signal/advantage_pre_scale_abs_mean": 0.07600368112325669, "signal/advantage_pre_scale_std": 0.13057213723659516, "signal/advantage_std": 0.9827877879142761, "signal/brier_reward/centered_abs_mean": 0.10208135396242142, "signal/brier_reward/group_std_mean": 0.1331118553876877, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17384454905986785, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010208135098218917, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02246842011809349, "signal/confidence_uniqueness_reward/group_std_mean": 0.035991473495960234, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03830303549766541, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002246842044405639, "signal/format_reward/centered_abs_mean": 0.010036892350763082, "signal/format_reward/group_std_mean": 0.02093004286289215, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08556498661637306, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005018446175381541, "signal/frontier_coverage_0/centered_abs_mean": 0.1399555742740631, "signal/frontier_coverage_0/group_std_mean": 0.18119101524353026, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03407430574297905, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020013647386804222, "signal/frontier_coverage_1/centered_abs_mean": 0.1399555742740631, "signal/frontier_coverage_1/group_std_mean": 0.18119101524353026, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03407430574297905, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020013647386804222, "signal/frontier_coverage_10/centered_abs_mean": 0.0573968268930912, "signal/frontier_coverage_10/group_std_mean": 0.0724210187792778, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013992871344089507, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00082077463157475, "signal/frontier_coverage_15/centered_abs_mean": 0.07783315032720566, "signal/frontier_coverage_15/group_std_mean": 0.096867735683918, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018964045867323875, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011130140395835043, "signal/frontier_coverage_20/centered_abs_mean": 0.10844850391149521, "signal/frontier_coverage_20/group_std_mean": 0.1354563981294632, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026414349675178528, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015508136246353387, "signal/frontier_coverage_25/centered_abs_mean": 0.14559023976325988, "signal/frontier_coverage_25/group_std_mean": 0.1825695514678955, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03545608147978783, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020819404162466526, "signal/frontier_coverage_5/centered_abs_mean": 0.1386626899242401, "signal/frontier_coverage_5/group_std_mean": 0.17956798374652863, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033758468180894854, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019828763790428637, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32271628379821776, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3866087257862091, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5493596196174622, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032271627336740494, "step": 780 }, { "calibration/aurc": 0.1465034067905237, "calibration/batch_distribution_entropy": 0.9576631905455045, "calibration/buffer_distribution_entropy": 0.9795268357527442, "calibration/confidence_entropy": 0.4937558184431149, "calibration/coverage@0%": 0.08272828751193013, "calibration/coverage@1%": 0.08272828751193013, "calibration/coverage@10%": 0.38023853512743166, "calibration/coverage@15%": 0.5750353464069988, "calibration/coverage@20%": 0.6699505205242648, "calibration/coverage@25%": 0.8307626851522694, "calibration/coverage@30%": 0.9249811051105523, "calibration/coverage@5%": 0.2943209485996614, "calibration/ece": 0.16532390242811154, "calibration/mean_confidence": 0.6030139186483918, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010243055555555557, "completions/max_length": 4063.2, "completions/max_terminated_length": 4063.2, "completions/mean_length": 1720.489404296875, "completions/mean_terminated_length": 1738.4248291015624, "completions/min_length": 0.0, "completions/min_terminated_length": 532.2, "epoch": 1.8855889301383733, "grad_norm": 0.0023424793034791946, "learning_rate": 1.5324519230769232e-06, "loss": -0.0277, "num_tokens": 2153651869.0, "reward": 1.0122368812561036, "reward_std": 0.1105235531926155, "rewards/accuracy_reward": 0.7264757037162781, "rewards/brier_reward": 0.8275084495544434, "rewards/confidence_uniqueness_reward": 0.937448239326477, "rewards/format_reward": 0.9897569417953491, "rewards/frontier_coverage_0": 0.0164753757417202, "rewards/frontier_coverage_1": 0.0164753757417202, "rewards/frontier_coverage_10": 0.05674448758363724, "rewards/frontier_coverage_15": 0.12070612460374833, "rewards/frontier_coverage_20": 0.19932154715061187, "rewards/frontier_coverage_25": 0.2911352813243866, "rewards/frontier_coverage_5": 0.01682386063039303, "rewards/frontier_entropy_batch_reward": -0.32637971043586733, "signal/accuracy_reward/centered_abs_mean": 0.11615125834941864, "signal/accuracy_reward/group_std_mean": 0.15916134268045426, "signal/accuracy_reward/group_zero_std_frac": 0.5250000059604645, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.935201108455658, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05807562917470932, "signal/advantage_abs_mean": 0.7558478832244873, "signal/advantage_pre_scale_abs_mean": 0.08194233477115631, "signal/advantage_pre_scale_std": 0.14115980565547942, "signal/advantage_std": 0.9828650236129761, "signal/brier_reward/centered_abs_mean": 0.10771840810775757, "signal/brier_reward/group_std_mean": 0.14069525003433228, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1741844743490219, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010771840997040271, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02886640131473541, "signal/confidence_uniqueness_reward/group_std_mean": 0.04311688244342804, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04669438749551773, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002886640280485153, "signal/format_reward/centered_abs_mean": 0.01532118059694767, "signal/format_reward/group_std_mean": 0.0267114520072937, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12298648655414582, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007660590298473835, "signal/frontier_coverage_0/centered_abs_mean": 0.14083233028650283, "signal/frontier_coverage_0/group_std_mean": 0.18645595610141755, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03259415253996849, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002013902342878282, "signal/frontier_coverage_1/centered_abs_mean": 0.14083233028650283, "signal/frontier_coverage_1/group_std_mean": 0.18645595610141755, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03259415253996849, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002013902342878282, "signal/frontier_coverage_10/centered_abs_mean": 0.060231783986091615, "signal/frontier_coverage_10/group_std_mean": 0.07518986761569976, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013987057469785213, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008613145095296204, "signal/frontier_coverage_15/centered_abs_mean": 0.08069218844175338, "signal/frontier_coverage_15/group_std_mean": 0.09967537224292755, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01876749433577061, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011538982624188065, "signal/frontier_coverage_20/centered_abs_mean": 0.11142556518316268, "signal/frontier_coverage_20/group_std_mean": 0.13851696103811265, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02588377967476845, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015933856135234236, "signal/frontier_coverage_25/centered_abs_mean": 0.14959222674369813, "signal/frontier_coverage_25/group_std_mean": 0.18737058937549592, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03470103591680527, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021391689078882337, "signal/frontier_coverage_5/centered_abs_mean": 0.13975331783294678, "signal/frontier_coverage_5/group_std_mean": 0.18505517840385438, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032344093546271324, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001998472446575761, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3308142781257629, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39403237104415895, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5391006350517273, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03308142870664597, "step": 785 }, { "calibration/aurc": 0.1939321799769151, "calibration/batch_distribution_entropy": 0.9231152057043847, "calibration/buffer_distribution_entropy": 0.9792681924919983, "calibration/confidence_entropy": 0.4568647554553452, "calibration/coverage@0%": 0.14125862699222508, "calibration/coverage@1%": 0.1814675042768204, "calibration/coverage@10%": 0.28295346780291786, "calibration/coverage@15%": 0.3981430680416006, "calibration/coverage@20%": 0.7165041515188754, "calibration/coverage@25%": 0.7863090890430093, "calibration/coverage@30%": 0.8315137573932498, "calibration/coverage@5%": 0.20597930801920508, "calibration/ece": 0.12468192539573246, "calibration/mean_confidence": 0.5811314275559181, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013281250000000022, "completions/max_length": 4051.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 1706.2888427734374, "completions/mean_terminated_length": 1730.19765625, "completions/min_length": 0.0, "completions/min_terminated_length": 504.6, "epoch": 1.8975887801402482, "grad_norm": 0.0025702903512865305, "learning_rate": 1.5024038461538462e-06, "loss": -0.0296, "num_tokens": 2176396092.0, "reward": 0.9944682359695435, "reward_std": 0.12540389597415924, "rewards/accuracy_reward": 0.6935763835906983, "rewards/brier_reward": 0.8150904059410096, "rewards/confidence_uniqueness_reward": 0.9365580677986145, "rewards/format_reward": 0.98671875, "rewards/frontier_coverage_0": 0.02789465319365263, "rewards/frontier_coverage_1": 0.02789465319365263, "rewards/frontier_coverage_10": 0.05078333094716072, "rewards/frontier_coverage_15": 0.10327455699443817, "rewards/frontier_coverage_20": 0.17147523760795594, "rewards/frontier_coverage_25": 0.25191813707351685, "rewards/frontier_coverage_5": 0.027757696248590945, "rewards/frontier_entropy_batch_reward": -0.30296459794044495, "signal/accuracy_reward/centered_abs_mean": 0.1440972253680229, "signal/accuracy_reward/group_std_mean": 0.18764382898807525, "signal/accuracy_reward/group_zero_std_frac": 0.472222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0437634110450744, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07204861268401146, "signal/advantage_abs_mean": 0.7594870686531067, "signal/advantage_pre_scale_abs_mean": 0.0951348215341568, "signal/advantage_pre_scale_std": 0.15686687529087068, "signal/advantage_std": 0.9830270886421204, "signal/brier_reward/centered_abs_mean": 0.12329903990030289, "signal/brier_reward/group_std_mean": 0.15922830402851104, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17916857302188874, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012329904362559319, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03225949928164482, "signal/confidence_uniqueness_reward/group_std_mean": 0.050067192316055296, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04653703421354294, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003225949825718999, "signal/format_reward/centered_abs_mean": 0.020838758908212185, "signal/format_reward/group_std_mean": 0.0363934725522995, "signal/format_reward/group_zero_std_frac": 0.8555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.14907054007053375, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010419379454106092, "signal/frontier_coverage_0/centered_abs_mean": 0.16697318553924562, "signal/frontier_coverage_0/group_std_mean": 0.2151999741792679, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03466854318976402, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023877166211605074, "signal/frontier_coverage_1/centered_abs_mean": 0.16697318553924562, "signal/frontier_coverage_1/group_std_mean": 0.2151999741792679, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03466854318976402, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023877166211605074, "signal/frontier_coverage_10/centered_abs_mean": 0.06191762536764145, "signal/frontier_coverage_10/group_std_mean": 0.07758147418498992, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012927941419184208, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008854220737703145, "signal/frontier_coverage_15/centered_abs_mean": 0.0792486310005188, "signal/frontier_coverage_15/group_std_mean": 0.09801900982856751, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016577761620283127, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001133255404420197, "signal/frontier_coverage_20/centered_abs_mean": 0.11002731919288636, "signal/frontier_coverage_20/group_std_mean": 0.1369099199771881, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02301861494779587, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015733906766399741, "signal/frontier_coverage_25/centered_abs_mean": 0.1496077835559845, "signal/frontier_coverage_25/group_std_mean": 0.1872713029384613, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03128085993230343, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00213939119130373, "signal/frontier_coverage_5/centered_abs_mean": 0.16589560508728027, "signal/frontier_coverage_5/group_std_mean": 0.21386311054229737, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0344485942274332, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023723070975393058, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3218592584133148, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3871870756149292, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.46996867656707764, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218592554330826, "step": 790 }, { "calibration/aurc": 0.17420068402511013, "calibration/batch_distribution_entropy": 0.936744799448956, "calibration/buffer_distribution_entropy": 0.9791764432986675, "calibration/confidence_entropy": 0.4700765218311792, "calibration/coverage@0%": 0.044497148525959325, "calibration/coverage@1%": 0.044497148525959325, "calibration/coverage@10%": 0.370861900013158, "calibration/coverage@15%": 0.48271767810026384, "calibration/coverage@20%": 0.5339723504837293, "calibration/coverage@25%": 0.7436015831134565, "calibration/coverage@30%": 0.8457893579595428, "calibration/coverage@5%": 0.21455714721016075, "calibration/ece": 0.13920112303625717, "calibration/mean_confidence": 0.6015508142066427, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 4013.4, "completions/max_terminated_length": 4013.4, "completions/mean_length": 1564.388037109375, "completions/mean_terminated_length": 1578.45361328125, "completions/min_length": 0.0, "completions/min_terminated_length": 519.6, "epoch": 1.9095886301421232, "grad_norm": 0.002500551752746105, "learning_rate": 1.4723557692307693e-06, "loss": -0.026, "num_tokens": 2197491826.0, "reward": 0.9953348517417908, "reward_std": 0.1119878500699997, "rewards/accuracy_reward": 0.697743046283722, "rewards/brier_reward": 0.8237994909286499, "rewards/confidence_uniqueness_reward": 0.9378173589706421, "rewards/format_reward": 0.9910590291023255, "rewards/frontier_coverage_0": 0.03404067233204842, "rewards/frontier_coverage_1": 0.03404067233204842, "rewards/frontier_coverage_10": 0.051751085370779035, "rewards/frontier_coverage_15": 0.10530708134174346, "rewards/frontier_coverage_20": 0.17555021941661836, "rewards/frontier_coverage_25": 0.25901117622852327, "rewards/frontier_coverage_5": 0.03414784893393517, "rewards/frontier_entropy_batch_reward": -0.35149884819984434, "signal/accuracy_reward/centered_abs_mean": 0.1218315988779068, "signal/accuracy_reward/group_std_mean": 0.15863377153873442, "signal/accuracy_reward/group_zero_std_frac": 0.5583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9944993138313294, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0609157994389534, "signal/advantage_abs_mean": 0.7687033891677857, "signal/advantage_pre_scale_abs_mean": 0.0843727320432663, "signal/advantage_pre_scale_std": 0.1435972660779953, "signal/advantage_std": 0.9828492641448975, "signal/brier_reward/centered_abs_mean": 0.11527116298675537, "signal/brier_reward/group_std_mean": 0.14814480543136596, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1880294054746628, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01152711659669876, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02728550471365452, "signal/confidence_uniqueness_reward/group_std_mean": 0.04235233888030052, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04486031234264374, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002728550648316741, "signal/format_reward/centered_abs_mean": 0.01403537318110466, "signal/format_reward/group_std_mean": 0.02631957270205021, "signal/format_reward/group_zero_std_frac": 0.8916666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11467897593975067, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00701768659055233, "signal/frontier_coverage_0/centered_abs_mean": 0.15350857526063919, "signal/frontier_coverage_0/group_std_mean": 0.1942602276802063, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035571636632084846, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002195172756910324, "signal/frontier_coverage_1/centered_abs_mean": 0.15350857526063919, "signal/frontier_coverage_1/group_std_mean": 0.1942602276802063, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035571636632084846, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002195172756910324, "signal/frontier_coverage_10/centered_abs_mean": 0.06201315745711326, "signal/frontier_coverage_10/group_std_mean": 0.07682908922433854, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014492305181920528, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008867881610058248, "signal/frontier_coverage_15/centered_abs_mean": 0.08047257363796234, "signal/frontier_coverage_15/group_std_mean": 0.09959491640329361, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01899382472038269, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011507577961310743, "signal/frontier_coverage_20/centered_abs_mean": 0.11074172109365463, "signal/frontier_coverage_20/group_std_mean": 0.13823194950819015, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.026179977133870123, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001583606656640768, "signal/frontier_coverage_25/centered_abs_mean": 0.148504039645195, "signal/frontier_coverage_25/group_std_mean": 0.1865270584821701, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03508494608104229, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002123607788234949, "signal/frontier_coverage_5/centered_abs_mean": 0.15275688022375106, "signal/frontier_coverage_5/group_std_mean": 0.19333274960517882, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03539147637784481, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002184423431754112, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3300867795944214, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3969386100769043, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5421956241130829, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033008677139878276, "step": 795 }, { "calibration/aurc": 0.1441095627724894, "calibration/batch_distribution_entropy": 0.9483558736045365, "calibration/buffer_distribution_entropy": 0.9798358679245261, "calibration/confidence_entropy": 0.49228147452219206, "calibration/coverage@0%": 0.12621647987116916, "calibration/coverage@1%": 0.20550409929511188, "calibration/coverage@10%": 0.5594735753813581, "calibration/coverage@15%": 0.6802893555892628, "calibration/coverage@20%": 0.7370080672589355, "calibration/coverage@25%": 0.7793635170603675, "calibration/coverage@30%": 0.8233267716535433, "calibration/coverage@5%": 0.3705636289846919, "calibration/ece": 0.14491320597761728, "calibration/mean_confidence": 0.5801336887442534, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007291666666666652, "completions/max_length": 4023.6, "completions/max_terminated_length": 4023.6, "completions/mean_length": 1477.7800048828126, "completions/mean_terminated_length": 1488.652392578125, "completions/min_length": 0.0, "completions/min_terminated_length": 478.6, "epoch": 1.9215884801439982, "grad_norm": 0.002651046961545944, "learning_rate": 1.4423076923076922e-06, "loss": -0.0157, "num_tokens": 2217642443.0, "reward": 1.0029356360435486, "reward_std": 0.12168239057064056, "rewards/accuracy_reward": 0.70390625, "rewards/brier_reward": 0.829195499420166, "rewards/confidence_uniqueness_reward": 0.9415722489356995, "rewards/format_reward": 0.9927083253860474, "rewards/frontier_coverage_0": 0.03227963969111443, "rewards/frontier_coverage_1": 0.03227963969111443, "rewards/frontier_coverage_10": 0.050942166894674304, "rewards/frontier_coverage_15": 0.10139497518539428, "rewards/frontier_coverage_20": 0.17052144110202788, "rewards/frontier_coverage_25": 0.2535334646701813, "rewards/frontier_coverage_5": 0.03230700695421547, "rewards/frontier_entropy_batch_reward": -0.32076034545898435, "signal/accuracy_reward/centered_abs_mean": 0.13656141459941865, "signal/accuracy_reward/group_std_mean": 0.18021790385246278, "signal/accuracy_reward/group_zero_std_frac": 0.4888889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0031284928321837, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06828070729970932, "signal/advantage_abs_mean": 0.760795509815216, "signal/advantage_pre_scale_abs_mean": 0.09117430299520493, "signal/advantage_pre_scale_std": 0.1516057848930359, "signal/advantage_std": 0.9829917192459107, "signal/brier_reward/centered_abs_mean": 0.10941434502601624, "signal/brier_reward/group_std_mean": 0.14455374777317048, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16310821771621703, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010941434279084205, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024841461703181265, "signal/confidence_uniqueness_reward/group_std_mean": 0.03970448262989521, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03761226050555706, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024841462261974813, "signal/format_reward/centered_abs_mean": 0.012597656343132257, "signal/format_reward/group_std_mean": 0.024988747760653497, "signal/format_reward/group_zero_std_frac": 0.8916666746139527, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09697704315185547, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006298828171566129, "signal/frontier_coverage_0/centered_abs_mean": 0.1431431382894516, "signal/frontier_coverage_0/group_std_mean": 0.18829217851161956, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03046076148748398, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002046946971677244, "signal/frontier_coverage_1/centered_abs_mean": 0.1431431382894516, "signal/frontier_coverage_1/group_std_mean": 0.18829217851161956, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03046076148748398, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002046946971677244, "signal/frontier_coverage_10/centered_abs_mean": 0.05590105578303337, "signal/frontier_coverage_10/group_std_mean": 0.071537347137928, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.011949419602751732, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0007993850973434746, "signal/frontier_coverage_15/centered_abs_mean": 0.07858142256736755, "signal/frontier_coverage_15/group_std_mean": 0.09816959351301194, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.016770840622484684, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001123714353889227, "signal/frontier_coverage_20/centered_abs_mean": 0.1134074330329895, "signal/frontier_coverage_20/group_std_mean": 0.14195845723152162, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024147434905171395, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016217263182625175, "signal/frontier_coverage_25/centered_abs_mean": 0.1560976982116699, "signal/frontier_coverage_25/group_std_mean": 0.19557462632656097, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0331905759871006, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002232197020202875, "signal/frontier_coverage_5/centered_abs_mean": 0.14286354184150696, "signal/frontier_coverage_5/group_std_mean": 0.18793686628341674, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030401355773210525, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002042948640882969, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33393247723579406, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3993107795715332, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4993874430656433, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033393248543143274, "step": 800 }, { "epoch": 1.9215884801439982, "eval_calibration/aurc": 0.1455259216944456, "eval_calibration/batch_distribution_entropy": 0.8971357413105366, "eval_calibration/buffer_distribution_entropy": 0.980327809840766, "eval_calibration/confidence_entropy": 0.5021766405484772, "eval_calibration/coverage@0%": 0.2746975806451613, "eval_calibration/coverage@1%": 0.2746975806451613, "eval_calibration/coverage@10%": 0.4625336021505377, "eval_calibration/coverage@15%": 0.494119623655914, "eval_calibration/coverage@20%": 0.8328293010752689, "eval_calibration/coverage@25%": 0.9322916666666666, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.4102822580645162, "eval_calibration/ece": 0.24091246639784947, "eval_calibration/mean_confidence": 0.5817957661290323, "eval_completions/clipped_ratio": 0.005208333333333352, "eval_completions/max_length": 3359.1666666666665, "eval_completions/max_terminated_length": 3359.1666666666665, "eval_completions/mean_length": 1410.9085286458333, "eval_completions/mean_terminated_length": 1418.3665771484375, "eval_completions/min_length": 194.16666666666666, "eval_completions/min_terminated_length": 507.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 2217642443.0, "eval_reward": 0.920647660891215, "eval_reward_std": 0.24002850552399954, "eval_rewards/accuracy_reward": 0.6875, "eval_rewards/brier_reward": 0.8208476801713308, "eval_rewards/confidence_uniqueness_reward": 0.8881562054157257, "eval_rewards/format_reward": 0.9921875099341074, "eval_rewards/frontier_coverage_0": 0.03746247625288864, "eval_rewards/frontier_coverage_1": 0.03746247625288864, "eval_rewards/frontier_coverage_10": 0.04715126069883505, "eval_rewards/frontier_coverage_15": 0.09141718472043674, "eval_rewards/frontier_coverage_20": 0.1547790989279747, "eval_rewards/frontier_coverage_25": 0.23214666545391083, "eval_rewards/frontier_coverage_5": 0.037501002584273614, "eval_rewards/frontier_entropy_batch_reward": -0.9921875099341074, "eval_runtime": 219.7567, "eval_samples_per_second": 4.55, "eval_signal/accuracy_reward/centered_abs_mean": 0.4129774272441864, "eval_signal/accuracy_reward/group_std_mean": 0.46059202154477435, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8701687455177307, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2064887136220932, "eval_signal/advantage_abs_mean": 0.8672041694323221, "eval_signal/advantage_pre_scale_abs_mean": 0.20854839434226355, "eval_signal/advantage_pre_scale_std": 0.23830395440260568, "eval_signal/advantage_std": 0.9864075283209482, "eval_signal/brier_reward/centered_abs_mean": 0.16755660126606622, "eval_signal/brier_reward/group_std_mean": 0.22830546647310257, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07057205463449161, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.016755660995841026, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.0519091517974933, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07648126035928726, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021853001477817696, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005190914962440729, "eval_signal/format_reward/centered_abs_mean": 0.014919704912851254, "eval_signal/format_reward/group_std_mean": 0.038215355637172856, "eval_signal/format_reward/group_zero_std_frac": 0.8055555820465088, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.030461806803941727, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.007459852456425627, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.26797403395175934, "eval_signal/frontier_coverage_0/group_std_mean": 0.37435539563496906, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016153021560360987, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038320288294926286, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.26797403395175934, "eval_signal/frontier_coverage_1/group_std_mean": 0.37435539563496906, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016153021560360987, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038320288294926286, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.07443711161613464, "eval_signal/frontier_coverage_10/group_std_mean": 0.09980045631527901, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004489072676127155, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010644506934719782, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.10701891779899597, "eval_signal/frontier_coverage_15/group_std_mean": 0.13961977263291678, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00645672227256, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015303705004043877, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.18605641275644302, "eval_signal/frontier_coverage_20/group_std_mean": 0.23238414277633032, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.011226917617022991, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0026606065997232995, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.2779003183046977, "eval_signal/frontier_coverage_25/group_std_mean": 0.341300701101621, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01676830028494199, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003973974303031961, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2673551340897878, "eval_signal/frontier_coverage_5/group_std_mean": 0.3735866844654083, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016115605210264523, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003823178354650736, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.014919704912851254, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.038215355637172856, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8055555820465088, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.00609236132974426, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0014919705766563613, "eval_steps_per_second": 0.027, "step": 800 }, { "epoch": 1.9215884801439982, "step": 800, "train_probe_calibration/aurc": 0.18083066108505683, "train_probe_calibration/batch_distribution_entropy": 0.9208006951468654, "train_probe_calibration/buffer_distribution_entropy": 0.9802754681502569, "train_probe_calibration/confidence_entropy": 0.4683939042130995, "train_probe_calibration/coverage@0%": 0.203125, "train_probe_calibration/coverage@1%": 0.203125, "train_probe_calibration/coverage@10%": 0.421875, "train_probe_calibration/coverage@15%": 0.609375, "train_probe_calibration/coverage@20%": 0.8125, "train_probe_calibration/coverage@25%": 0.90625, "train_probe_calibration/coverage@30%": 0.953125, "train_probe_calibration/coverage@5%": 0.22395833333333334, "train_probe_calibration/ece": 0.23921666666666672, "train_probe_calibration/mean_confidence": 0.6079770833333333, "train_probe_completions/clipped_ratio": 0.006770833333333337, "train_probe_completions/max_length": 3489.3333333333335, "train_probe_completions/max_terminated_length": 3489.3333333333335, "train_probe_completions/mean_length": 1386.4324747721355, "train_probe_completions/mean_terminated_length": 1395.5091552734375, "train_probe_completions/min_length": 247.16666666666666, "train_probe_completions/min_terminated_length": 441.6666666666667, "train_probe_loss": 0.0, "train_probe_num_tokens": 2217642443.0, "train_probe_reward": 0.9529338677724203, "train_probe_reward_std": 0.22208184003829956, "train_probe_rewards/accuracy_reward": 0.7491319477558136, "train_probe_rewards/brier_reward": 0.8249579966068268, "train_probe_rewards/confidence_uniqueness_reward": 0.8884093761444092, "train_probe_rewards/format_reward": 0.995659718910853, "train_probe_rewards/frontier_coverage_0": 0.0003041389087835948, "train_probe_rewards/frontier_coverage_1": 0.0003041389087835948, "train_probe_rewards/frontier_coverage_10": 0.046201564371585846, "train_probe_rewards/frontier_coverage_15": 0.10657777761419614, "train_probe_rewards/frontier_coverage_20": 0.18386120597521463, "train_probe_rewards/frontier_coverage_25": 0.27541854977607727, "train_probe_rewards/frontier_coverage_5": 0.00042533256297853467, "train_probe_rewards/frontier_entropy_batch_reward": -0.995659718910853, "train_probe_runtime": 199.9791, "train_probe_samples_per_second": 5.001, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3640950520833333, "train_probe_signal/accuracy_reward/group_std_mean": 0.4310727119445801, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8359168668588003, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18204752604166666, "train_probe_signal/advantage_abs_mean": 0.8151635825634003, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18291218082110086, "train_probe_signal/advantage_pre_scale_std": 0.22114630540211996, "train_probe_signal/advantage_std": 0.9863705039024353, "train_probe_signal/brier_reward/centered_abs_mean": 0.169136772553126, "train_probe_signal/brier_reward/group_std_mean": 0.23165656626224518, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07761403918266296, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.01691367772097389, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.049099608014027275, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.07007196421424548, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02254458951453368, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004909960941101114, "train_probe_signal/format_reward/centered_abs_mean": 0.008409287935743729, "train_probe_signal/format_reward/group_std_mean": 0.02455231888840596, "train_probe_signal/format_reward/group_zero_std_frac": 0.8611111243565878, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.018713080634673435, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004204643967871864, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.24267660826444626, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.362765575448672, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015939356448749702, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003470275589885811, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.24267660826444626, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.362765575448672, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015939356448749702, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003470275589885811, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.0767682616909345, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.10428255423903465, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005042990514387687, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00109778616266946, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.11625955998897552, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.14683445791403452, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0076437525761624174, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0016625117083700995, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.19440337270498276, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2381580794850985, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012779997972150644, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027799681605150304, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.28210918108622235, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.343789463241895, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018544109848638374, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004034161296052237, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.24214978516101837, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3620627323786418, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015904737481226523, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0034627420051644244, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008409287935743729, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02455231888840596, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111243565878, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.003742616313199202, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008409288323794802, "train_probe_steps_per_second": 0.03 }, { "calibration/aurc": 0.08876575446621993, "calibration/batch_distribution_entropy": 0.9423436960308473, "calibration/buffer_distribution_entropy": 0.9805957757457785, "calibration/confidence_entropy": 0.4807342807779079, "calibration/coverage@0%": 0.11765208751376892, "calibration/coverage@1%": 0.2897543936392867, "calibration/coverage@10%": 0.6165106442373482, "calibration/coverage@15%": 0.8177444677953274, "calibration/coverage@20%": 0.8942868407742388, "calibration/coverage@25%": 0.9299153828277719, "calibration/coverage@30%": 0.9597159378289339, "calibration/coverage@5%": 0.48157154399538965, "calibration/ece": 0.18224708996069522, "calibration/mean_confidence": 0.6022935318017679, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888862, "completions/max_length": 4028.4, "completions/max_terminated_length": 4028.4, "completions/mean_length": 1402.483349609375, "completions/mean_terminated_length": 1408.866552734375, "completions/min_length": 0.0, "completions/min_terminated_length": 425.0, "epoch": 1.9335883301458732, "grad_norm": 0.002710554050281644, "learning_rate": 1.4122596153846154e-06, "loss": -0.0106, "num_tokens": 2236900619.0, "reward": 1.0090242743492126, "reward_std": 0.10053354352712632, "rewards/accuracy_reward": 0.7064236044883728, "rewards/brier_reward": 0.8399913668632507, "rewards/confidence_uniqueness_reward": 0.9446008086204529, "rewards/format_reward": 0.9954861164093017, "rewards/frontier_coverage_0": 0.04834661977365613, "rewards/frontier_coverage_1": 0.04834661977365613, "rewards/frontier_coverage_10": 0.056374243646860125, "rewards/frontier_coverage_15": 0.1052817702293396, "rewards/frontier_coverage_20": 0.1751980185508728, "rewards/frontier_coverage_25": 0.2610403925180435, "rewards/frontier_coverage_5": 0.04840220175683498, "rewards/frontier_entropy_batch_reward": -0.31014604568481446, "signal/accuracy_reward/centered_abs_mean": 0.10692274272441864, "signal/accuracy_reward/group_std_mean": 0.14614808857440947, "signal/accuracy_reward/group_zero_std_frac": 0.5555555641651153, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9182430386543274, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05346137136220932, "signal/advantage_abs_mean": 0.7531801104545593, "signal/advantage_pre_scale_abs_mean": 0.07416855543851852, "signal/advantage_pre_scale_std": 0.12756477743387223, "signal/advantage_std": 0.9827540397644043, "signal/brier_reward/centered_abs_mean": 0.10358149409294129, "signal/brier_reward/group_std_mean": 0.13555350452661513, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1797472804784775, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01035814955830574, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02066163383424282, "signal/confidence_uniqueness_reward/group_std_mean": 0.033146093413233754, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0357735026627779, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020661634393036366, "signal/format_reward/centered_abs_mean": 0.008192274253815413, "signal/format_reward/group_std_mean": 0.017970719560980796, "signal/format_reward/group_zero_std_frac": 0.9166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06989422589540481, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004096137126907706, "signal/frontier_coverage_0/centered_abs_mean": 0.14836236834526062, "signal/frontier_coverage_0/group_std_mean": 0.19158115983009338, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036716148257255554, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021215818589553235, "signal/frontier_coverage_1/centered_abs_mean": 0.14836236834526062, "signal/frontier_coverage_1/group_std_mean": 0.19158115983009338, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036716148257255554, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021215818589553235, "signal/frontier_coverage_10/centered_abs_mean": 0.05951479524374008, "signal/frontier_coverage_10/group_std_mean": 0.07481328845024109, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014785249903798103, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008510615676641464, "signal/frontier_coverage_15/centered_abs_mean": 0.07461834698915482, "signal/frontier_coverage_15/group_std_mean": 0.09241391271352768, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01858535371720791, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010670423740521072, "signal/frontier_coverage_20/centered_abs_mean": 0.10110130459070206, "signal/frontier_coverage_20/group_std_mean": 0.12569495439529418, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025172940641641616, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014457486337050795, "signal/frontier_coverage_25/centered_abs_mean": 0.13566771894693375, "signal/frontier_coverage_25/group_std_mean": 0.16943861842155455, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.033737773075699806, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019400483928620816, "signal/frontier_coverage_5/centered_abs_mean": 0.14806557297706605, "signal/frontier_coverage_5/group_std_mean": 0.19121136963367463, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036642659455537796, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021173376822844147, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3130863606929779, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3798399746417999, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5450647294521331, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03130863644182682, "step": 805 }, { "calibration/aurc": 0.1471249527656881, "calibration/batch_distribution_entropy": 0.9672537796661741, "calibration/buffer_distribution_entropy": 0.979552963929845, "calibration/confidence_entropy": 0.49464106724349055, "calibration/coverage@0%": 0.029417291554110984, "calibration/coverage@1%": 0.06362781786990046, "calibration/coverage@10%": 0.36135376187304225, "calibration/coverage@15%": 0.5955494493668724, "calibration/coverage@20%": 0.7921494204888049, "calibration/coverage@25%": 0.8716848264548156, "calibration/coverage@30%": 0.9671018276762402, "calibration/coverage@5%": 0.10620979637637609, "calibration/ece": 0.16179601206450783, "calibration/mean_confidence": 0.5837775350143656, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005034722222222232, "completions/max_length": 4013.2, "completions/max_terminated_length": 4013.2, "completions/mean_length": 1473.1595703125, "completions/mean_terminated_length": 1480.6045166015624, "completions/min_length": 0.0, "completions/min_terminated_length": 476.6, "epoch": 1.9455881801477481, "grad_norm": 0.0026609154883772135, "learning_rate": 1.3822115384615387e-06, "loss": -0.0142, "num_tokens": 2256970521.0, "reward": 1.0169323682785034, "reward_std": 0.11198469400405883, "rewards/accuracy_reward": 0.7326388955116272, "rewards/brier_reward": 0.8031673669815064, "rewards/confidence_uniqueness_reward": 0.9455313444137573, "rewards/format_reward": 0.9949652791023255, "rewards/frontier_coverage_0": -0.009512295946478844, "rewards/frontier_coverage_1": -0.009512295946478844, "rewards/frontier_coverage_10": 0.03974997103214264, "rewards/frontier_coverage_15": 0.09132075309753418, "rewards/frontier_coverage_20": 0.15879679769277572, "rewards/frontier_coverage_25": 0.2403422027826309, "rewards/frontier_coverage_5": -0.009342345595359802, "rewards/frontier_entropy_batch_reward": -0.28915963172912595, "signal/accuracy_reward/centered_abs_mean": 0.1264214411377907, "signal/accuracy_reward/group_std_mean": 0.17384180426597595, "signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9332287669181824, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06321072056889535, "signal/advantage_abs_mean": 0.7431563854217529, "signal/advantage_pre_scale_abs_mean": 0.08071474879980087, "signal/advantage_pre_scale_std": 0.13466430604457855, "signal/advantage_std": 0.983002245426178, "signal/brier_reward/centered_abs_mean": 0.11788292080163956, "signal/brier_reward/group_std_mean": 0.15307863652706147, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17422791421413422, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011788292042911052, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021130212768912315, "signal/confidence_uniqueness_reward/group_std_mean": 0.03701507076621056, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03132249191403389, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00211302125826478, "signal/format_reward/centered_abs_mean": 0.009461805690079927, "signal/format_reward/group_std_mean": 0.02294406220316887, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0699712760746479, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004730902845039964, "signal/frontier_coverage_0/centered_abs_mean": 0.17159743010997772, "signal/frontier_coverage_0/group_std_mean": 0.2192448854446411, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036260566860437396, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0024538431782275437, "signal/frontier_coverage_1/centered_abs_mean": 0.17159743010997772, "signal/frontier_coverage_1/group_std_mean": 0.2192448854446411, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036260566860437396, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0024538431782275437, "signal/frontier_coverage_10/centered_abs_mean": 0.06220594048500061, "signal/frontier_coverage_10/group_std_mean": 0.07851073145866394, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013184322603046894, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008895449223928154, "signal/frontier_coverage_15/centered_abs_mean": 0.07340938150882721, "signal/frontier_coverage_15/group_std_mean": 0.0915742427110672, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015591609664261341, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010497541399672628, "signal/frontier_coverage_20/centered_abs_mean": 0.10003067702054977, "signal/frontier_coverage_20/group_std_mean": 0.12545598596334456, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02125253602862358, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014304386451840401, "signal/frontier_coverage_25/centered_abs_mean": 0.1350281298160553, "signal/frontier_coverage_25/group_std_mean": 0.17083185017108918, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02868236191570759, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019309022929519415, "signal/frontier_coverage_5/centered_abs_mean": 0.17116305530071257, "signal/frontier_coverage_5/group_std_mean": 0.21869678497314454, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03616959452629089, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0024476317223161457, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31781532168388366, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38577706813812257, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4724361836910248, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0317815326154232, "step": 810 }, { "calibration/aurc": 0.07738243799150339, "calibration/batch_distribution_entropy": 0.9603774756717269, "calibration/buffer_distribution_entropy": 0.9790119971436921, "calibration/confidence_entropy": 0.47555495386025026, "calibration/coverage@0%": 0.0898050077121467, "calibration/coverage@1%": 0.24541858473564537, "calibration/coverage@10%": 0.6958624603570598, "calibration/coverage@15%": 0.8912864988448866, "calibration/coverage@20%": 0.9743838256804231, "calibration/coverage@25%": 0.9994778067885118, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5200500577556628, "calibration/ece": 0.21906389852930958, "calibration/mean_confidence": 0.595564159124398, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3920.6, "completions/max_terminated_length": 3920.6, "completions/mean_length": 1437.7090576171875, "completions/mean_terminated_length": 1443.9518798828126, "completions/min_length": 0.0, "completions/min_terminated_length": 421.2, "epoch": 1.9575880301496231, "grad_norm": 0.0027806926518678665, "learning_rate": 1.3521634615384617e-06, "loss": -0.0134, "num_tokens": 2276629761.0, "reward": 1.0065173625946044, "reward_std": 0.10549866706132889, "rewards/accuracy_reward": 0.7015625, "rewards/brier_reward": 0.809320867061615, "rewards/confidence_uniqueness_reward": 0.9481490731239319, "rewards/format_reward": 0.9957465171813965, "rewards/frontier_coverage_0": 0.019368353858590127, "rewards/frontier_coverage_1": 0.019368353858590127, "rewards/frontier_coverage_10": 0.04486367180943489, "rewards/frontier_coverage_15": 0.08716509938240051, "rewards/frontier_coverage_20": 0.1480014741420746, "rewards/frontier_coverage_25": 0.2240679919719696, "rewards/frontier_coverage_5": 0.019489490240812302, "rewards/frontier_entropy_batch_reward": -0.2592541307210922, "signal/accuracy_reward/centered_abs_mean": 0.11748046725988388, "signal/accuracy_reward/group_std_mean": 0.1602412313222885, "signal/accuracy_reward/group_zero_std_frac": 0.5305555641651154, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9277384519577027, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05874023362994194, "signal/advantage_abs_mean": 0.7442155957221985, "signal/advantage_pre_scale_abs_mean": 0.07687741965055465, "signal/advantage_pre_scale_std": 0.12851224541664125, "signal/advantage_std": 0.982895040512085, "signal/brier_reward/centered_abs_mean": 0.10857034474611282, "signal/brier_reward/group_std_mean": 0.1429567039012909, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17236358523368836, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010857034847140313, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01928409282118082, "signal/confidence_uniqueness_reward/group_std_mean": 0.03310770466923714, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030435840785503387, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019284092588350177, "signal/format_reward/centered_abs_mean": 0.0077636716421693565, "signal/format_reward/group_std_mean": 0.0192112909629941, "signal/format_reward/group_zero_std_frac": 0.9, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06007810868322849, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0038818358210846783, "signal/frontier_coverage_0/centered_abs_mean": 0.16339569687843322, "signal/frontier_coverage_0/group_std_mean": 0.21440580487251282, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03715735524892807, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023365584667772053, "signal/frontier_coverage_1/centered_abs_mean": 0.16339569687843322, "signal/frontier_coverage_1/group_std_mean": 0.21440580487251282, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03715735524892807, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023365584667772053, "signal/frontier_coverage_10/centered_abs_mean": 0.058762216567993165, "signal/frontier_coverage_10/group_std_mean": 0.07503211051225663, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013392175361514092, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008402997045777738, "signal/frontier_coverage_15/centered_abs_mean": 0.06615000814199448, "signal/frontier_coverage_15/group_std_mean": 0.0824548989534378, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.015085921250283718, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0009459450608119369, "signal/frontier_coverage_20/centered_abs_mean": 0.08922545164823532, "signal/frontier_coverage_20/group_std_mean": 0.11132535338401794, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.020322853699326515, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0012759239645674825, "signal/frontier_coverage_25/centered_abs_mean": 0.1211626797914505, "signal/frontier_coverage_25/group_std_mean": 0.152143993973732, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.027561284601688385, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0017326262313872576, "signal/frontier_coverage_5/centered_abs_mean": 0.16293131411075593, "signal/frontier_coverage_5/group_std_mean": 0.21380787193775178, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0370516188442707, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002329917624592781, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31487070918083193, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3838111996650696, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5012700438499451, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03148707151412964, "step": 815 }, { "calibration/aurc": 0.1261956584549119, "calibration/batch_distribution_entropy": 0.9483172114138678, "calibration/buffer_distribution_entropy": 0.977788883764146, "calibration/confidence_entropy": 0.4780430829054336, "calibration/coverage@0%": 0.20325120097585714, "calibration/coverage@1%": 0.21160629235966916, "calibration/coverage@10%": 0.567608430473606, "calibration/coverage@15%": 0.6381378578085702, "calibration/coverage@20%": 0.6888456583266518, "calibration/coverage@25%": 0.7916421888598781, "calibration/coverage@30%": 0.8010416666666667, "calibration/coverage@5%": 0.48046569295107683, "calibration/ece": 0.19220841936618543, "calibration/mean_confidence": 0.602271614621774, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0057291666666666515, "completions/max_length": 3918.0, "completions/max_terminated_length": 3918.0, "completions/mean_length": 1418.9080810546875, "completions/mean_terminated_length": 1427.1089599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 452.2, "epoch": 1.969587880151498, "grad_norm": 0.0029568555764853954, "learning_rate": 1.3221153846153848e-06, "loss": -0.019, "num_tokens": 2296059358.0, "reward": 1.01281396150589, "reward_std": 0.10452383458614349, "rewards/accuracy_reward": 0.7182291626930237, "rewards/brier_reward": 0.8234552621841431, "rewards/confidence_uniqueness_reward": 0.9444832563400268, "rewards/format_reward": 0.9942708373069763, "rewards/frontier_coverage_0": 0.025464657321572305, "rewards/frontier_coverage_1": 0.025464657321572305, "rewards/frontier_coverage_10": 0.05050650909543038, "rewards/frontier_coverage_15": 0.10313679426908492, "rewards/frontier_coverage_20": 0.17374457716941832, "rewards/frontier_coverage_25": 0.2584977805614471, "rewards/frontier_coverage_5": 0.025534218549728392, "rewards/frontier_entropy_batch_reward": -0.29701498746871946, "signal/accuracy_reward/centered_abs_mean": 0.11195746660232545, "signal/accuracy_reward/group_std_mean": 0.1531495451927185, "signal/accuracy_reward/group_zero_std_frac": 0.5388888895511628, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9098389506340027, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05597873330116272, "signal/advantage_abs_mean": 0.7542153596878052, "signal/advantage_pre_scale_abs_mean": 0.07647128999233246, "signal/advantage_pre_scale_std": 0.12971136420965196, "signal/advantage_std": 0.982850193977356, "signal/brier_reward/centered_abs_mean": 0.10961567163467408, "signal/brier_reward/group_std_mean": 0.1442680150270462, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.178808531165123, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010961567610502243, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022698301821947098, "signal/confidence_uniqueness_reward/group_std_mean": 0.037622253969311716, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03659343495965004, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022698301589116456, "signal/format_reward/centered_abs_mean": 0.010427517350763083, "signal/format_reward/group_std_mean": 0.022845717146992683, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08172076642513275, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005213758675381541, "signal/frontier_coverage_0/centered_abs_mean": 0.15467220842838286, "signal/frontier_coverage_0/group_std_mean": 0.20240817666053773, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036262784898281095, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022118125576525926, "signal/frontier_coverage_1/centered_abs_mean": 0.15467220842838286, "signal/frontier_coverage_1/group_std_mean": 0.20240817666053773, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036262784898281095, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022118125576525926, "signal/frontier_coverage_10/centered_abs_mean": 0.06070537865161896, "signal/frontier_coverage_10/group_std_mean": 0.07693096548318863, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014242619462311267, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008680869126692414, "signal/frontier_coverage_15/centered_abs_mean": 0.07251727730035781, "signal/frontier_coverage_15/group_std_mean": 0.08975716978311539, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01698379050940275, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010369970346800982, "signal/frontier_coverage_20/centered_abs_mean": 0.09770961105823517, "signal/frontier_coverage_20/group_std_mean": 0.12141573280096055, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.022855057194828988, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0013972474029287697, "signal/frontier_coverage_25/centered_abs_mean": 0.13089303821325302, "signal/frontier_coverage_25/group_std_mean": 0.1633365660905838, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0305940430611372, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0018717704573646189, "signal/frontier_coverage_5/centered_abs_mean": 0.15423052310943602, "signal/frontier_coverage_5/group_std_mean": 0.20185908377170564, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03615873046219349, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002205496421083808, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33060168027877807, "signal/frontier_entropy_batch_reward/group_std_mean": 0.396547919511795, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5411663591861725, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03306016884744167, "step": 820 }, { "calibration/aurc": 0.12442483011569842, "calibration/batch_distribution_entropy": 0.9488465592185378, "calibration/buffer_distribution_entropy": 0.9773383751335333, "calibration/confidence_entropy": 0.4841768576504233, "calibration/coverage@0%": 0.1472027306353351, "calibration/coverage@1%": 0.15190246953872935, "calibration/coverage@10%": 0.45165361183637937, "calibration/coverage@15%": 0.7130344321148825, "calibration/coverage@20%": 0.8033126631853786, "calibration/coverage@25%": 0.8612108355091384, "calibration/coverage@30%": 0.895097639251523, "calibration/coverage@5%": 0.4119859660574412, "calibration/ece": 0.2017781809453873, "calibration/mean_confidence": 0.5740669150620106, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555536, "completions/max_length": 3631.6, "completions/max_terminated_length": 3631.6, "completions/mean_length": 1321.271875, "completions/mean_terminated_length": 1325.5808349609374, "completions/min_length": 0.0, "completions/min_terminated_length": 385.4, "epoch": 1.981587730153373, "grad_norm": 0.0029349022079259157, "learning_rate": 1.292067307692308e-06, "loss": -0.0096, "num_tokens": 2314368794.0, "reward": 1.0168671369552613, "reward_std": 0.10342361778020859, "rewards/accuracy_reward": 0.7309895634651185, "rewards/brier_reward": 0.8359049558639526, "rewards/confidence_uniqueness_reward": 0.9436007738113403, "rewards/format_reward": 0.9967881798744201, "rewards/frontier_coverage_0": 0.023777881916612387, "rewards/frontier_coverage_1": 0.023777881916612387, "rewards/frontier_coverage_10": 0.052507009357213974, "rewards/frontier_coverage_15": 0.10996098518371582, "rewards/frontier_coverage_20": 0.18512236475944518, "rewards/frontier_coverage_25": 0.27436395883560183, "rewards/frontier_coverage_5": 0.023881060443818568, "rewards/frontier_entropy_batch_reward": -0.3488785922527313, "signal/accuracy_reward/centered_abs_mean": 0.11349283754825593, "signal/accuracy_reward/group_std_mean": 0.15641495883464812, "signal/accuracy_reward/group_zero_std_frac": 0.5333333313465118, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9231669783592225, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05674641877412796, "signal/advantage_abs_mean": 0.7529475927352905, "signal/advantage_pre_scale_abs_mean": 0.07502718269824982, "signal/advantage_pre_scale_std": 0.12588909417390823, "signal/advantage_std": 0.9828490853309632, "signal/brier_reward/centered_abs_mean": 0.10705066174268722, "signal/brier_reward/group_std_mean": 0.1404803767800331, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17473995983600615, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010705066099762916, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020141271874308586, "signal/confidence_uniqueness_reward/group_std_mean": 0.03240118809044361, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03299994915723801, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020141272805631162, "signal/format_reward/centered_abs_mean": 0.006114366184920073, "signal/format_reward/group_std_mean": 0.015425614640116691, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04925214573740959, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0030571830924600364, "signal/frontier_coverage_0/centered_abs_mean": 0.15044747292995453, "signal/frontier_coverage_0/group_std_mean": 0.19402441680431365, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035044976323843, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021513988031074405, "signal/frontier_coverage_1/centered_abs_mean": 0.15044747292995453, "signal/frontier_coverage_1/group_std_mean": 0.19402441680431365, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035044976323843, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021513988031074405, "signal/frontier_coverage_10/centered_abs_mean": 0.05916027277708054, "signal/frontier_coverage_10/group_std_mean": 0.07418683767318726, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013846796937286854, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008459919597953558, "signal/frontier_coverage_15/centered_abs_mean": 0.0763422504067421, "signal/frontier_coverage_15/group_std_mean": 0.09492785483598709, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017957745492458342, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010916941799223423, "signal/frontier_coverage_20/centered_abs_mean": 0.10528118759393693, "signal/frontier_coverage_20/group_std_mean": 0.13137973099946976, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02475803196430206, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015055209165439009, "signal/frontier_coverage_25/centered_abs_mean": 0.1406207025051117, "signal/frontier_coverage_25/group_std_mean": 0.1765914499759674, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03303173556923866, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002010876010172069, "signal/frontier_coverage_5/centered_abs_mean": 0.14978999197483062, "signal/frontier_coverage_5/group_std_mean": 0.1932007133960724, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03489072918891907, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002141996775753796, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3424068748950958, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4095862090587616, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.562820303440094, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034240689128637314, "step": 825 }, { "calibration/aurc": 0.09355573925524277, "calibration/batch_distribution_entropy": 0.9612319655245531, "calibration/buffer_distribution_entropy": 0.9780537442600634, "calibration/confidence_entropy": 0.4896003885731581, "calibration/coverage@0%": 0.04173466057441253, "calibration/coverage@1%": 0.04173466057441253, "calibration/coverage@10%": 0.6361591601392516, "calibration/coverage@15%": 0.790623640121845, "calibration/coverage@20%": 0.9227154046997388, "calibration/coverage@25%": 0.9744125326370756, "calibration/coverage@30%": 0.993733681462141, "calibration/coverage@5%": 0.37932985204525677, "calibration/ece": 0.2052610825050439, "calibration/mean_confidence": 0.5847677710929464, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00269097222222221, "completions/max_length": 3762.2, "completions/max_terminated_length": 3762.2, "completions/mean_length": 1352.4456787109375, "completions/mean_terminated_length": 1356.0873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 341.0, "epoch": 1.993587580155248, "grad_norm": 0.0030781906098127365, "learning_rate": 1.2620192307692309e-06, "loss": -0.0042, "num_tokens": 2333073032.0, "reward": 1.0112749218940735, "reward_std": 0.10696900635957718, "rewards/accuracy_reward": 0.7067708253860474, "rewards/brier_reward": 0.8296258449554443, "rewards/confidence_uniqueness_reward": 0.9480892419815063, "rewards/format_reward": 0.9972222208976745, "rewards/frontier_coverage_0": 0.0333960821852088, "rewards/frontier_coverage_1": 0.0333960821852088, "rewards/frontier_coverage_10": 0.0535750538110733, "rewards/frontier_coverage_15": 0.1058346152305603, "rewards/frontier_coverage_20": 0.17506144642829896, "rewards/frontier_coverage_25": 0.25745048820972444, "rewards/frontier_coverage_5": 0.03353348933160305, "rewards/frontier_entropy_batch_reward": -0.28392277359962464, "signal/accuracy_reward/centered_abs_mean": 0.1243598073720932, "signal/accuracy_reward/group_std_mean": 0.16650678515434264, "signal/accuracy_reward/group_zero_std_frac": 0.5083333492279053, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9852775573730469, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0621799036860466, "signal/advantage_abs_mean": 0.7518176913261414, "signal/advantage_pre_scale_abs_mean": 0.08001424670219422, "signal/advantage_pre_scale_std": 0.13119888603687285, "signal/advantage_std": 0.9828913331031799, "signal/brier_reward/centered_abs_mean": 0.10915734171867371, "signal/brier_reward/group_std_mean": 0.14323717653751372, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17464982271194457, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010915734060108661, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018232964724302293, "signal/confidence_uniqueness_reward/group_std_mean": 0.029694054275751114, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02886694110929966, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018232964677736164, "signal/format_reward/centered_abs_mean": 0.00530598945915699, "signal/format_reward/group_std_mean": 0.013867205008864403, "signal/format_reward/group_zero_std_frac": 0.9277778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.041387615352869035, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002652994729578495, "signal/frontier_coverage_0/centered_abs_mean": 0.15533825755119324, "signal/frontier_coverage_0/group_std_mean": 0.20087738037109376, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03541974872350693, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022213370073586704, "signal/frontier_coverage_1/centered_abs_mean": 0.15533825755119324, "signal/frontier_coverage_1/group_std_mean": 0.20087738037109376, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03541974872350693, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022213370073586704, "signal/frontier_coverage_10/centered_abs_mean": 0.06064486652612686, "signal/frontier_coverage_10/group_std_mean": 0.07602840662002563, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01384783312678337, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008672215277329087, "signal/frontier_coverage_15/centered_abs_mean": 0.07716170549392701, "signal/frontier_coverage_15/group_std_mean": 0.09574876427650451, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01760086081922054, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011034123599529266, "signal/frontier_coverage_20/centered_abs_mean": 0.10649595856666565, "signal/frontier_coverage_20/group_std_mean": 0.133084973692894, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.024280770123004912, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015228921081870794, "signal/frontier_coverage_25/centered_abs_mean": 0.14340307414531708, "signal/frontier_coverage_25/group_std_mean": 0.18047136664390565, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.032692290097475055, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002050663949921727, "signal/frontier_coverage_5/centered_abs_mean": 0.15442144870758057, "signal/frontier_coverage_5/group_std_mean": 0.19973163902759553, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03521168828010559, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022082267329096793, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32058742046356203, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3872204661369324, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5107461273670196, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032058742642402646, "step": 830 }, { "calibration/aurc": 0.10569611704478651, "calibration/batch_distribution_entropy": 0.9533977736379946, "calibration/buffer_distribution_entropy": 0.978567734903438, "calibration/confidence_entropy": 0.47113983265831044, "calibration/coverage@0%": 0.12089054077299177, "calibration/coverage@1%": 0.14536970743965844, "calibration/coverage@10%": 0.5777594846873447, "calibration/coverage@15%": 0.7742097968754984, "calibration/coverage@20%": 0.868240380321609, "calibration/coverage@25%": 0.9299003458504778, "calibration/coverage@30%": 0.9843095976925074, "calibration/coverage@5%": 0.36547769173285216, "calibration/ece": 0.16692479621857995, "calibration/mean_confidence": 0.6082626008419189, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0028645833333333483, "completions/max_length": 3270.6, "completions/max_terminated_length": 3270.6, "completions/mean_length": 1294.222314453125, "completions/mean_terminated_length": 1298.0444580078124, "completions/min_length": 136.8, "completions/min_terminated_length": 464.8, "epoch": 2.007199910001125, "grad_norm": 0.002975533716380596, "learning_rate": 1.231971153846154e-06, "loss": -0.0119, "num_tokens": 2351306124.0, "reward": 1.0097980260849, "reward_std": 0.10146247148513794, "rewards/accuracy_reward": 0.7085069417953491, "rewards/brier_reward": 0.823908519744873, "rewards/confidence_uniqueness_reward": 0.9468066334724426, "rewards/format_reward": 0.9962673664093018, "rewards/frontier_coverage_0": 0.02436054665595293, "rewards/frontier_coverage_1": 0.02436054665595293, "rewards/frontier_coverage_10": 0.049334879219532016, "rewards/frontier_coverage_15": 0.10209986120462418, "rewards/frontier_coverage_20": 0.17016193866729737, "rewards/frontier_coverage_25": 0.25037443935871123, "rewards/frontier_coverage_5": 0.02455103537067771, "rewards/frontier_entropy_batch_reward": -0.288876348733902, "signal/accuracy_reward/centered_abs_mean": 0.10684678852558135, "signal/accuracy_reward/group_std_mean": 0.14497185051441192, "signal/accuracy_reward/group_zero_std_frac": 0.5666666686534881, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8739351272583008, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05342339426279068, "signal/advantage_abs_mean": 0.7597023010253906, "signal/advantage_pre_scale_abs_mean": 0.07467978298664094, "signal/advantage_pre_scale_std": 0.12498285323381424, "signal/advantage_std": 0.9828245639801025, "signal/brier_reward/centered_abs_mean": 0.11109703779220581, "signal/brier_reward/group_std_mean": 0.14594402611255647, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18416101932525636, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011109703965485097, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019707629829645155, "signal/confidence_uniqueness_reward/group_std_mean": 0.03326699696481228, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03271297216415405, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001970763085409999, "signal/format_reward/centered_abs_mean": 0.007090928731486201, "signal/format_reward/group_std_mean": 0.01796768419444561, "signal/format_reward/group_zero_std_frac": 0.9083333492279053, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.057955706119537355, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0035454643657431006, "signal/frontier_coverage_0/centered_abs_mean": 0.14938410818576814, "signal/frontier_coverage_0/group_std_mean": 0.19320706129074097, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035366549342870715, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002136192889884114, "signal/frontier_coverage_1/centered_abs_mean": 0.14938410818576814, "signal/frontier_coverage_1/group_std_mean": 0.19320706129074097, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035366549342870715, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002136192889884114, "signal/frontier_coverage_10/centered_abs_mean": 0.059661376476287845, "signal/frontier_coverage_10/group_std_mean": 0.07549520283937454, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014192548766732215, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008531576604582369, "signal/frontier_coverage_15/centered_abs_mean": 0.07816312313079835, "signal/frontier_coverage_15/group_std_mean": 0.09718612283468246, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018611904233694077, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011177326552569867, "signal/frontier_coverage_20/centered_abs_mean": 0.10804884135723114, "signal/frontier_coverage_20/group_std_mean": 0.1343923181295395, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025684969499707222, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015450984239578248, "signal/frontier_coverage_25/centered_abs_mean": 0.14375920593738556, "signal/frontier_coverage_25/group_std_mean": 0.17935467660427093, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03412468209862709, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002055756654590368, "signal/frontier_coverage_5/centered_abs_mean": 0.14854101240634918, "signal/frontier_coverage_5/group_std_mean": 0.19215008020401, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.035167403519153595, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021241364534944295, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3249393939971924, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39019683599472044, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5427081823348999, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249393925070763, "step": 835 }, { "calibration/aurc": 0.062211177501246516, "calibration/batch_distribution_entropy": 0.9600943074730827, "calibration/buffer_distribution_entropy": 0.977535601973992, "calibration/confidence_entropy": 0.47148411925605904, "calibration/coverage@0%": 0.14299342105263158, "calibration/coverage@1%": 0.4756743421052632, "calibration/coverage@10%": 0.7766829705152934, "calibration/coverage@15%": 0.8547742433636447, "calibration/coverage@20%": 0.9250560588316341, "calibration/coverage@25%": 0.9696335078534031, "calibration/coverage@30%": 0.9837696335078533, "calibration/coverage@5%": 0.6397520552034536, "calibration/ece": 0.2028160098956898, "calibration/mean_confidence": 0.5834283529912395, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00616319444444442, "completions/max_length": 3825.8, "completions/max_terminated_length": 3825.8, "completions/mean_length": 1376.5011474609375, "completions/mean_terminated_length": 1385.176806640625, "completions/min_length": 0.0, "completions/min_terminated_length": 430.6, "epoch": 2.019199760003, "grad_norm": 0.002842454006895423, "learning_rate": 1.201923076923077e-06, "loss": -0.0196, "num_tokens": 2370261017.0, "reward": 1.0190519213676452, "reward_std": 0.11003706753253936, "rewards/accuracy_reward": 0.7313368082046509, "rewards/brier_reward": 0.8243500471115113, "rewards/confidence_uniqueness_reward": 0.9446738362312317, "rewards/format_reward": 0.9938368082046509, "rewards/frontier_coverage_0": 0.01120219323784113, "rewards/frontier_coverage_1": 0.01120219323784113, "rewards/frontier_coverage_10": 0.04924294427037239, "rewards/frontier_coverage_15": 0.10741689503192901, "rewards/frontier_coverage_20": 0.18002953827381135, "rewards/frontier_coverage_25": 0.2659532427787781, "rewards/frontier_coverage_5": 0.011461955774575473, "rewards/frontier_entropy_batch_reward": -0.29539363384246825, "signal/accuracy_reward/centered_abs_mean": 0.114794921875, "signal/accuracy_reward/group_std_mean": 0.15858063697814942, "signal/accuracy_reward/group_zero_std_frac": 0.5194444596767426, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9148180603981018, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0573974609375, "signal/advantage_abs_mean": 0.7386746406555176, "signal/advantage_pre_scale_abs_mean": 0.07803614884614944, "signal/advantage_pre_scale_std": 0.13629978895187378, "signal/advantage_std": 0.9828796863555909, "signal/brier_reward/centered_abs_mean": 0.10765846222639083, "signal/brier_reward/group_std_mean": 0.1435864210128784, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17238323390483856, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010765845887362957, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0232498437166214, "signal/confidence_uniqueness_reward/group_std_mean": 0.041168955340981486, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03694990836083889, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002324984478764236, "signal/format_reward/centered_abs_mean": 0.011539713572710752, "signal/format_reward/group_std_mean": 0.027077178843319415, "signal/format_reward/group_zero_std_frac": 0.869444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09020622819662094, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005769856786355376, "signal/frontier_coverage_0/centered_abs_mean": 0.1507784366607666, "signal/frontier_coverage_0/group_std_mean": 0.1961473524570465, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03440270908176899, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002156131574884057, "signal/frontier_coverage_1/centered_abs_mean": 0.1507784366607666, "signal/frontier_coverage_1/group_std_mean": 0.1961473524570465, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03440270908176899, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002156131574884057, "signal/frontier_coverage_10/centered_abs_mean": 0.05933835953474045, "signal/frontier_coverage_10/group_std_mean": 0.07431373596191407, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01358701903373003, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008485385100357234, "signal/frontier_coverage_15/centered_abs_mean": 0.07423074394464493, "signal/frontier_coverage_15/group_std_mean": 0.09245937913656235, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017081401497125625, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010614996077492832, "signal/frontier_coverage_20/centered_abs_mean": 0.10113731771707535, "signal/frontier_coverage_20/group_std_mean": 0.12667881697416306, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.023303528502583503, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014462636318057775, "signal/frontier_coverage_25/centered_abs_mean": 0.13548611998558044, "signal/frontier_coverage_25/group_std_mean": 0.1705509215593338, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.031227792799472808, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019374514231458306, "signal/frontier_coverage_5/centered_abs_mean": 0.15002332031726837, "signal/frontier_coverage_5/group_std_mean": 0.19519493579864503, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034229816496372224, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145333564840257, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33091223835945127, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39722990393638613, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5324559807777405, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03309122622013092, "step": 840 }, { "calibration/aurc": 0.06684569611947105, "calibration/batch_distribution_entropy": 0.9320840327223838, "calibration/buffer_distribution_entropy": 0.9765756609311985, "calibration/confidence_entropy": 0.519962933260064, "calibration/coverage@0%": 0.3502802096278069, "calibration/coverage@1%": 0.35970463864047325, "calibration/coverage@10%": 0.725888842557783, "calibration/coverage@15%": 0.8328692453679034, "calibration/coverage@20%": 0.9629242819843341, "calibration/coverage@25%": 0.9900783289817232, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5334907507356006, "calibration/ece": 0.21881182634444443, "calibration/mean_confidence": 0.6166823207944025, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006944444444444464, "completions/max_length": 3702.8, "completions/max_terminated_length": 3702.8, "completions/mean_length": 1365.2091064453125, "completions/mean_terminated_length": 1374.738330078125, "completions/min_length": 0.0, "completions/min_terminated_length": 376.0, "epoch": 2.031199610004875, "grad_norm": 0.0029446668922901154, "learning_rate": 1.1718750000000001e-06, "loss": -0.0191, "num_tokens": 2389087778.0, "reward": 1.0194933891296387, "reward_std": 0.10787245631217957, "rewards/accuracy_reward": 0.7318576455116272, "rewards/brier_reward": 0.8298116207122803, "rewards/confidence_uniqueness_reward": 0.9431999564170838, "rewards/format_reward": 0.9930555582046509, "rewards/frontier_coverage_0": 0.01630272523034364, "rewards/frontier_coverage_1": 0.01630272523034364, "rewards/frontier_coverage_10": 0.05233140736818313, "rewards/frontier_coverage_15": 0.10671553313732147, "rewards/frontier_coverage_20": 0.17818537950515748, "rewards/frontier_coverage_25": 0.26444960534572604, "rewards/frontier_coverage_5": 0.016577059985138476, "rewards/frontier_entropy_batch_reward": -0.2957174897193909, "signal/accuracy_reward/centered_abs_mean": 0.11165906935930252, "signal/accuracy_reward/group_std_mean": 0.1545466125011444, "signal/accuracy_reward/group_zero_std_frac": 0.5277777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8903607368469239, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05582953467965126, "signal/advantage_abs_mean": 0.7452172636985779, "signal/advantage_pre_scale_abs_mean": 0.07781935185194015, "signal/advantage_pre_scale_std": 0.13558341413736344, "signal/advantage_std": 0.9828800678253173, "signal/brier_reward/centered_abs_mean": 0.10683204084634781, "signal/brier_reward/group_std_mean": 0.14029796719551085, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17178708612918853, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010683204606175422, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023753628134727478, "signal/confidence_uniqueness_reward/group_std_mean": 0.04016609191894531, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03838530480861664, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023753628134727477, "signal/format_reward/centered_abs_mean": 0.011631944589316845, "signal/format_reward/group_std_mean": 0.02555925101041794, "signal/format_reward/group_zero_std_frac": 0.8805555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09416337609291077, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005815972294658423, "signal/frontier_coverage_0/centered_abs_mean": 0.14774567186832427, "signal/frontier_coverage_0/group_std_mean": 0.19174781441688538, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0339319072663784, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021127631422132253, "signal/frontier_coverage_1/centered_abs_mean": 0.14774567186832427, "signal/frontier_coverage_1/group_std_mean": 0.19174781441688538, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0339319072663784, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021127631422132253, "signal/frontier_coverage_10/centered_abs_mean": 0.05867672711610794, "signal/frontier_coverage_10/group_std_mean": 0.07409504801034927, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013528883457183838, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008390772040002048, "signal/frontier_coverage_15/centered_abs_mean": 0.07549417465925216, "signal/frontier_coverage_15/group_std_mean": 0.09374563097953796, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.017411107011139394, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010795666836202144, "signal/frontier_coverage_20/centered_abs_mean": 0.10412525534629821, "signal/frontier_coverage_20/group_std_mean": 0.12972887754440307, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02397709749639034, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0014889911515638232, "signal/frontier_coverage_25/centered_abs_mean": 0.13956733644008637, "signal/frontier_coverage_25/group_std_mean": 0.17463013529777527, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03210580088198185, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019958128686994314, "signal/frontier_coverage_5/centered_abs_mean": 0.14707699418067932, "signal/frontier_coverage_5/group_std_mean": 0.19090475738048554, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03377884775400162, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002103201043792069, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32507652044296265, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3902816414833069, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5230625331401825, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03250765353441239, "step": 845 }, { "calibration/aurc": 0.15014207230116053, "calibration/batch_distribution_entropy": 0.9652176849919577, "calibration/buffer_distribution_entropy": 0.9773341124664008, "calibration/confidence_entropy": 0.45897563079350806, "calibration/coverage@0%": 0.13444436090568168, "calibration/coverage@1%": 0.1975359079030707, "calibration/coverage@10%": 0.35414943679685046, "calibration/coverage@15%": 0.5254633339826573, "calibration/coverage@20%": 0.734143564401551, "calibration/coverage@25%": 0.864620228038039, "calibration/coverage@30%": 0.9110765706806283, "calibration/coverage@5%": 0.23821666290742236, "calibration/ece": 0.22260068186711415, "calibration/mean_confidence": 0.5354948923959031, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006163194444444442, "completions/max_length": 3740.4, "completions/max_terminated_length": 3740.4, "completions/mean_length": 1421.7507080078126, "completions/mean_terminated_length": 1430.629345703125, "completions/min_length": 0.0, "completions/min_terminated_length": 429.6, "epoch": 2.04319946000675, "grad_norm": 0.0027419920079410076, "learning_rate": 1.141826923076923e-06, "loss": -0.015, "num_tokens": 2408581834.0, "reward": 1.0171156525611877, "reward_std": 0.12138309180736542, "rewards/accuracy_reward": 0.7256944417953491, "rewards/brier_reward": 0.817634391784668, "rewards/confidence_uniqueness_reward": 0.9451643347740173, "rewards/format_reward": 0.9938368082046509, "rewards/frontier_coverage_0": 0.008699403330683707, "rewards/frontier_coverage_1": 0.008699403330683707, "rewards/frontier_coverage_10": 0.049815284460783, "rewards/frontier_coverage_15": 0.10400655418634415, "rewards/frontier_coverage_20": 0.17297520637512206, "rewards/frontier_coverage_25": 0.2560495167970657, "rewards/frontier_coverage_5": 0.008948711678385735, "rewards/frontier_entropy_batch_reward": -0.2764132499694824, "signal/accuracy_reward/centered_abs_mean": 0.1402560740709305, "signal/accuracy_reward/group_std_mean": 0.18668433427810668, "signal/accuracy_reward/group_zero_std_frac": 0.4666666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0340290307998656, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07012803703546525, "signal/advantage_abs_mean": 0.7511513710021973, "signal/advantage_pre_scale_abs_mean": 0.08810898214578629, "signal/advantage_pre_scale_std": 0.14673225283622743, "signal/advantage_std": 0.9830058932304382, "signal/brier_reward/centered_abs_mean": 0.1129622220993042, "signal/brier_reward/group_std_mean": 0.1479853630065918, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16706807613372804, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011296222917735577, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023049880191683768, "signal/confidence_uniqueness_reward/group_std_mean": 0.0411870576441288, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03394843973219395, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023049880284816025, "signal/format_reward/centered_abs_mean": 0.011420355923473836, "signal/format_reward/group_std_mean": 0.027312709763646126, "signal/format_reward/group_zero_std_frac": 0.8638888835906983, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08352030664682389, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005710177961736918, "signal/frontier_coverage_0/centered_abs_mean": 0.1656607449054718, "signal/frontier_coverage_0/group_std_mean": 0.2115688681602478, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035052116960287094, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023689485620707273, "signal/frontier_coverage_1/centered_abs_mean": 0.1656607449054718, "signal/frontier_coverage_1/group_std_mean": 0.2115688681602478, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035052116960287094, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023689485620707273, "signal/frontier_coverage_10/centered_abs_mean": 0.06097555160522461, "signal/frontier_coverage_10/group_std_mean": 0.07610448449850082, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.012921417132019997, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008719503879547119, "signal/frontier_coverage_15/centered_abs_mean": 0.07464765012264252, "signal/frontier_coverage_15/group_std_mean": 0.09246674776077271, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01581343188881874, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0010674613760784269, "signal/frontier_coverage_20/centered_abs_mean": 0.10256357938051223, "signal/frontier_coverage_20/group_std_mean": 0.12776158303022384, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02171347513794899, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001466659177094698, "signal/frontier_coverage_25/centered_abs_mean": 0.1391077905893326, "signal/frontier_coverage_25/group_std_mean": 0.17436096370220183, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.029439039155840875, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0019892414566129446, "signal/frontier_coverage_5/centered_abs_mean": 0.1648542582988739, "signal/frontier_coverage_5/group_std_mean": 0.21056585609912873, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.034882017970085145, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002357415994629264, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31900513768196104, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3865148961544037, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47265112996101377, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031900516524910924, "step": 850 }, { "epoch": 2.04319946000675, "eval_calibration/aurc": 0.10331722642995035, "eval_calibration/batch_distribution_entropy": 0.8994859346965384, "eval_calibration/buffer_distribution_entropy": 0.9778323392141767, "eval_calibration/confidence_entropy": 0.48347415095907587, "eval_calibration/coverage@0%": 0.33602729885057475, "eval_calibration/coverage@1%": 0.33602729885057475, "eval_calibration/coverage@10%": 0.5599856321839081, "eval_calibration/coverage@15%": 0.7857399425287356, "eval_calibration/coverage@20%": 0.8602729885057471, "eval_calibration/coverage@25%": 0.9301364942528735, "eval_calibration/coverage@30%": 0.9780890804597702, "eval_calibration/coverage@5%": 0.3776939655172414, "eval_calibration/ece": 0.2081085308908046, "eval_calibration/mean_confidence": 0.6086531788793104, "eval_completions/clipped_ratio": 0.00434027777777779, "eval_completions/max_length": 3287.5, "eval_completions/max_terminated_length": 3287.5, "eval_completions/mean_length": 1345.1163330078125, "eval_completions/mean_terminated_length": 1350.9969482421875, "eval_completions/min_length": 255.16666666666666, "eval_completions/min_terminated_length": 472.5, "eval_loss": 0.0, "eval_num_tokens": 2408581834.0, "eval_reward": 0.9270426034927368, "eval_reward_std": 0.23688022047281265, "eval_rewards/accuracy_reward": 0.6944444477558136, "eval_rewards/brier_reward": 0.8301023046175638, "eval_rewards/confidence_uniqueness_reward": 0.8897854586442312, "eval_rewards/format_reward": 0.9947916766007742, "eval_rewards/frontier_coverage_0": 0.03768664660553137, "eval_rewards/frontier_coverage_1": 0.03768664660553137, "eval_rewards/frontier_coverage_10": 0.052901595210035644, "eval_rewards/frontier_coverage_15": 0.1036976898709933, "eval_rewards/frontier_coverage_20": 0.17143141478300095, "eval_rewards/frontier_coverage_25": 0.2521692191561063, "eval_rewards/frontier_coverage_5": 0.037777805080016456, "eval_rewards/frontier_entropy_batch_reward": -0.9947916766007742, "eval_runtime": 212.5872, "eval_samples_per_second": 4.704, "eval_signal/accuracy_reward/centered_abs_mean": 0.4058159738779068, "eval_signal/accuracy_reward/group_std_mean": 0.454753835995992, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8737364013989767, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2029079869389534, "eval_signal/advantage_abs_mean": 0.8658012747764587, "eval_signal/advantage_pre_scale_abs_mean": 0.20703220119078955, "eval_signal/advantage_pre_scale_std": 0.23602647334337234, "eval_signal/advantage_std": 0.9863978525002798, "eval_signal/brier_reward/centered_abs_mean": 0.163881945113341, "eval_signal/brier_reward/group_std_mean": 0.22143561144669852, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07060187309980392, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.01638819541161259, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04664057493209839, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06534135589996974, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020085140131413937, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00466405728366226, "eval_signal/format_reward/centered_abs_mean": 0.009765624844779571, "eval_signal/format_reward/group_std_mean": 0.022957629524171352, "eval_signal/format_reward/group_zero_std_frac": 0.8888889153798422, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.020531928166747093, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004882812422389786, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.24889005223910013, "eval_signal/frontier_coverage_0/group_std_mean": 0.34743093450864154, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015377589967101812, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003559127605209748, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.24889005223910013, "eval_signal/frontier_coverage_1/group_std_mean": 0.34743093450864154, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015377589967101812, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003559127605209748, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.07524702822168668, "eval_signal/frontier_coverage_10/group_std_mean": 0.0998810629049937, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004650625012194117, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001076032465789467, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12017117316524188, "eval_signal/frontier_coverage_15/group_std_mean": 0.1525182550152143, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007422773788372676, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017184477183036506, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.20055429637432098, "eval_signal/frontier_coverage_20/group_std_mean": 0.24715026964743933, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012374301285793384, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0028679263778030872, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.294106458624204, "eval_signal/frontier_coverage_25/group_std_mean": 0.3577578862508138, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.018138304352760315, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004205722206582625, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.24757558852434158, "eval_signal/frontier_coverage_5/group_std_mean": 0.3458026399215062, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01529612842326363, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00354033091571182, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.009765624844779571, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.022957629524171352, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889153798422, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004106385710959633, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0009765625, "eval_steps_per_second": 0.028, "step": 850 }, { "epoch": 2.04319946000675, "step": 850, "train_probe_calibration/aurc": 0.12330988667450855, "train_probe_calibration/batch_distribution_entropy": 0.903959760016461, "train_probe_calibration/buffer_distribution_entropy": 0.9778106939337934, "train_probe_calibration/confidence_entropy": 0.4707082194129657, "train_probe_calibration/coverage@0%": 0.28461021505376344, "train_probe_calibration/coverage@1%": 0.28461021505376344, "train_probe_calibration/coverage@10%": 0.5561155913978495, "train_probe_calibration/coverage@15%": 0.6920362903225806, "train_probe_calibration/coverage@20%": 0.9163306451612904, "train_probe_calibration/coverage@25%": 0.9791666666666666, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.3111559139784946, "train_probe_calibration/ece": 0.2055883400537634, "train_probe_calibration/mean_confidence": 0.6214233534946236, "train_probe_completions/clipped_ratio": 0.00434027777777779, "train_probe_completions/max_length": 3570.5, "train_probe_completions/max_terminated_length": 3570.5, "train_probe_completions/mean_length": 1370.541015625, "train_probe_completions/mean_terminated_length": 1376.5172932942708, "train_probe_completions/min_length": 171.33333333333334, "train_probe_completions/min_terminated_length": 486.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 2408581834.0, "train_probe_reward": 0.9569332003593445, "train_probe_reward_std": 0.2246442437171936, "train_probe_rewards/accuracy_reward": 0.7491319477558136, "train_probe_rewards/brier_reward": 0.84639111161232, "train_probe_rewards/confidence_uniqueness_reward": 0.8930891950925192, "train_probe_rewards/format_reward": 0.9947916766007742, "train_probe_rewards/frontier_coverage_0": 0.02118841770182674, "train_probe_rewards/frontier_coverage_1": 0.02118841770182674, "train_probe_rewards/frontier_coverage_10": 0.05621129460632801, "train_probe_rewards/frontier_coverage_15": 0.11968943352500598, "train_probe_rewards/frontier_coverage_20": 0.1998051976164182, "train_probe_rewards/frontier_coverage_25": 0.2949647903442383, "train_probe_rewards/frontier_coverage_5": 0.02139244688441977, "train_probe_rewards/frontier_entropy_batch_reward": -0.9947916766007742, "train_probe_runtime": 211.7592, "train_probe_samples_per_second": 4.722, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3678927918275197, "train_probe_signal/accuracy_reward/group_std_mean": 0.43377458055814105, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8337254126866659, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.18394639591375986, "train_probe_signal/advantage_abs_mean": 0.8216431637605032, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18601106852293015, "train_probe_signal/advantage_pre_scale_std": 0.22373904784520468, "train_probe_signal/advantage_std": 0.9863761961460114, "train_probe_signal/brier_reward/centered_abs_mean": 0.14684191594521204, "train_probe_signal/brier_reward/group_std_mean": 0.20539081345001856, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06664901288847129, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014684191749741634, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04518438751498858, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06575708587964375, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020526379346847534, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0045184389455243945, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022930872005720932, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23663152754306793, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.34504841764767963, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015345245134085417, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033838309658070407, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23663152754306793, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.34504841764767963, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015345245134085417, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0033838309658070407, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.07336462040742238, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.0970181276400884, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.004758586253349979, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00104911407106556, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1168044979373614, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.14624296625455221, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.007575858850032091, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001670304317182551, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.19161372631788254, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2350246881445249, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.012425205515076717, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0027400763938203454, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.2768913333614667, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.33837322890758514, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01795490738004446, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.003959546098485589, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23545273641745249, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.34352175891399384, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01526872410128514, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033669740660116076, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004586174463232358, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091145910943549, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.19908929956399884, "calibration/batch_distribution_entropy": 0.921269447034111, "calibration/buffer_distribution_entropy": 0.9780586621449909, "calibration/confidence_entropy": 0.4840395335947276, "calibration/coverage@0%": 0.005749607669798455, "calibration/coverage@1%": 0.005749607669798455, "calibration/coverage@10%": 0.34102231999068, "calibration/coverage@15%": 0.6641502710333531, "calibration/coverage@20%": 0.7498762866868071, "calibration/coverage@25%": 0.7665864694544311, "calibration/coverage@30%": 0.802620535042992, "calibration/coverage@5%": 0.11384360244786633, "calibration/ece": 0.16123747199751368, "calibration/mean_confidence": 0.6117934088268526, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0043402777777777676, "completions/max_length": 3814.6, "completions/max_terminated_length": 3814.6, "completions/mean_length": 1304.929931640625, "completions/mean_terminated_length": 1310.61953125, "completions/min_length": 0.0, "completions/min_terminated_length": 327.4, "epoch": 2.055199310008625, "grad_norm": 0.0032066632993519306, "learning_rate": 1.1117788461538462e-06, "loss": -0.0148, "num_tokens": 2426706307.0, "reward": 1.0196407675743102, "reward_std": 0.10803952366113663, "rewards/accuracy_reward": 0.7427083373069763, "rewards/brier_reward": 0.817469346523285, "rewards/confidence_uniqueness_reward": 0.9436403512954712, "rewards/format_reward": 0.9956597208976745, "rewards/frontier_coverage_0": -0.006897637248039245, "rewards/frontier_coverage_1": -0.006897637248039245, "rewards/frontier_coverage_10": 0.046662700921297075, "rewards/frontier_coverage_15": 0.10604156851768494, "rewards/frontier_coverage_20": 0.17919767796993255, "rewards/frontier_coverage_25": 0.26617750227451326, "rewards/frontier_coverage_5": -0.006526473723351955, "rewards/frontier_entropy_batch_reward": -0.3391614556312561, "signal/accuracy_reward/centered_abs_mean": 0.11490885317325591, "signal/accuracy_reward/group_std_mean": 0.1600183442234993, "signal/accuracy_reward/group_zero_std_frac": 0.5111111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.880127203464508, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05745442658662796, "signal/advantage_abs_mean": 0.7468799233436585, "signal/advantage_pre_scale_abs_mean": 0.07798066586256028, "signal/advantage_pre_scale_std": 0.13145326524972917, "signal/advantage_std": 0.9829369902610778, "signal/brier_reward/centered_abs_mean": 0.11005659252405167, "signal/brier_reward/group_std_mean": 0.14418595135211945, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1698257029056549, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01100565940141678, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021314630657434462, "signal/confidence_uniqueness_reward/group_std_mean": 0.035215172171592715, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03296075724065304, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021314630983397366, "signal/format_reward/centered_abs_mean": 0.00807291679084301, "signal/format_reward/group_std_mean": 0.01913252491503954, "signal/format_reward/group_zero_std_frac": 0.9055555701255799, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06147683933377266, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004036458395421505, "signal/frontier_coverage_0/centered_abs_mean": 0.14297114163637162, "signal/frontier_coverage_0/group_std_mean": 0.1866467148065567, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03151693716645241, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002044487278908491, "signal/frontier_coverage_1/centered_abs_mean": 0.14297114163637162, "signal/frontier_coverage_1/group_std_mean": 0.1866467148065567, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03151693716645241, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002044487278908491, "signal/frontier_coverage_10/centered_abs_mean": 0.059609665721654895, "signal/frontier_coverage_10/group_std_mean": 0.07471666783094406, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.013211605697870254, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000852418236900121, "signal/frontier_coverage_15/centered_abs_mean": 0.08127216696739196, "signal/frontier_coverage_15/group_std_mean": 0.10074764937162399, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018062039092183114, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001162191992625594, "signal/frontier_coverage_20/centered_abs_mean": 0.11325040906667709, "signal/frontier_coverage_20/group_std_mean": 0.1412164866924286, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025165878981351853, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016194808762520553, "signal/frontier_coverage_25/centered_abs_mean": 0.15194992423057557, "signal/frontier_coverage_25/group_std_mean": 0.190808442234993, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03373695760965347, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002172883879393339, "signal/frontier_coverage_5/centered_abs_mean": 0.14223289340734482, "signal/frontier_coverage_5/group_std_mean": 0.18570739328861235, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.031353960186243056, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020339304348453878, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34247671365737914, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40856011509895324, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5324842154979705, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03424767255783081, "step": 855 }, { "calibration/aurc": 0.10892732221259172, "calibration/batch_distribution_entropy": 0.942645479055105, "calibration/buffer_distribution_entropy": 0.9767172763640286, "calibration/confidence_entropy": 0.4767007364941224, "calibration/coverage@0%": 0.042236584444139066, "calibration/coverage@1%": 0.0833824177774724, "calibration/coverage@10%": 0.471758608744446, "calibration/coverage@15%": 0.8324888776281435, "calibration/coverage@20%": 0.8920896317163667, "calibration/coverage@25%": 0.9370618329439788, "calibration/coverage@30%": 0.9763157894736842, "calibration/coverage@5%": 0.3179804377948788, "calibration/ece": 0.16581658387299025, "calibration/mean_confidence": 0.6103028231313843, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004079861111111116, "completions/max_length": 4006.8, "completions/max_terminated_length": 4006.8, "completions/mean_length": 1369.608349609375, "completions/mean_terminated_length": 1375.258837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 353.2, "epoch": 2.0671991600105, "grad_norm": 0.002877203281968832, "learning_rate": 1.0817307692307693e-06, "loss": -0.0101, "num_tokens": 2445551011.0, "reward": 1.0163455367088319, "reward_std": 0.10174518972635269, "rewards/accuracy_reward": 0.7261284708976745, "rewards/brier_reward": 0.8230874896049499, "rewards/confidence_uniqueness_reward": 0.945272159576416, "rewards/format_reward": 0.9959201574325561, "rewards/frontier_coverage_0": 0.011111350171267987, "rewards/frontier_coverage_1": 0.011111350171267987, "rewards/frontier_coverage_10": 0.048263268917798995, "rewards/frontier_coverage_15": 0.10801017582416535, "rewards/frontier_coverage_20": 0.1811635434627533, "rewards/frontier_coverage_25": 0.2692377507686615, "rewards/frontier_coverage_5": 0.01138177290558815, "rewards/frontier_entropy_batch_reward": -0.3067070960998535, "signal/accuracy_reward/centered_abs_mean": 0.10654839426279068, "signal/accuracy_reward/group_std_mean": 0.15034933537244796, "signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8724170207977295, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05327419713139534, "signal/advantage_abs_mean": 0.7521509408950806, "signal/advantage_pre_scale_abs_mean": 0.07447454035282135, "signal/advantage_pre_scale_std": 0.1254849314689636, "signal/advantage_std": 0.9828340649604798, "signal/brier_reward/centered_abs_mean": 0.10740028470754623, "signal/brier_reward/group_std_mean": 0.13960683047771455, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1774017930030823, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010740028135478497, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01953975111246109, "signal/confidence_uniqueness_reward/group_std_mean": 0.030278518795967102, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03224896155297756, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019539752043783664, "signal/format_reward/centered_abs_mean": 0.006157769076526165, "signal/format_reward/group_std_mean": 0.01401168517768383, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0502224363386631, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0030788845382630826, "signal/frontier_coverage_0/centered_abs_mean": 0.1405676171183586, "signal/frontier_coverage_0/group_std_mean": 0.18494743406772612, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03321279361844063, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020101168891415, "signal/frontier_coverage_1/centered_abs_mean": 0.1405676171183586, "signal/frontier_coverage_1/group_std_mean": 0.18494743406772612, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03321279361844063, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020101168891415, "signal/frontier_coverage_10/centered_abs_mean": 0.06137363091111183, "signal/frontier_coverage_10/group_std_mean": 0.07704001814126968, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014510192163288593, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008776429109275341, "signal/frontier_coverage_15/centered_abs_mean": 0.0798090323805809, "signal/frontier_coverage_15/group_std_mean": 0.09855622947216033, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018882869556546212, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001141269225627184, "signal/frontier_coverage_20/centered_abs_mean": 0.1102443590760231, "signal/frontier_coverage_20/group_std_mean": 0.1372637167572975, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02606889493763447, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0015764942625537514, "signal/frontier_coverage_25/centered_abs_mean": 0.14848661720752715, "signal/frontier_coverage_25/group_std_mean": 0.18640778362751007, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03508574143052101, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021233585197478534, "signal/frontier_coverage_5/centered_abs_mean": 0.1398726522922516, "signal/frontier_coverage_5/group_std_mean": 0.1840555638074875, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.033049411699175836, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020001789554953573, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32406482100486755, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3888309359550476, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5364414274692535, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03240648210048676, "step": 860 }, { "calibration/aurc": 0.11880237943016916, "calibration/batch_distribution_entropy": 0.9515152013772351, "calibration/buffer_distribution_entropy": 0.9756482987390385, "calibration/confidence_entropy": 0.4727274902334776, "calibration/coverage@0%": 0.12918026544821584, "calibration/coverage@1%": 0.1520969321148825, "calibration/coverage@10%": 0.5746899477806788, "calibration/coverage@15%": 0.6884165578764143, "calibration/coverage@20%": 0.7405474869451697, "calibration/coverage@25%": 0.8483953437771976, "calibration/coverage@30%": 0.9348917536988687, "calibration/coverage@5%": 0.37449276544821586, "calibration/ece": 0.1899077945890448, "calibration/mean_confidence": 0.6083666128032529, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0028645833333333483, "completions/max_length": 3851.4, "completions/max_terminated_length": 3851.4, "completions/mean_length": 1410.0517333984376, "completions/mean_terminated_length": 1414.1339599609375, "completions/min_length": 0.0, "completions/min_terminated_length": 460.8, "epoch": 2.079199010012375, "grad_norm": 0.0028341130819171667, "learning_rate": 1.0516826923076925e-06, "loss": -0.0089, "num_tokens": 2464906903.0, "reward": 1.033591103553772, "reward_std": 0.09442763477563858, "rewards/accuracy_reward": 0.7585069417953492, "rewards/brier_reward": 0.8452976226806641, "rewards/confidence_uniqueness_reward": 0.9443268656730652, "rewards/format_reward": 0.9971354365348816, "rewards/frontier_coverage_0": 0.008574995025992394, "rewards/frontier_coverage_1": 0.008574995025992394, "rewards/frontier_coverage_10": 0.05472532734274864, "rewards/frontier_coverage_15": 0.12366417050361633, "rewards/frontier_coverage_20": 0.20866862833499908, "rewards/frontier_coverage_25": 0.3106157422065735, "rewards/frontier_coverage_5": 0.008856690488755703, "rewards/frontier_entropy_batch_reward": -0.335411924123764, "signal/accuracy_reward/centered_abs_mean": 0.09484591782093048, "signal/accuracy_reward/group_std_mean": 0.1376100465655327, "signal/accuracy_reward/group_zero_std_frac": 0.5555555760860443, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8194542527198792, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04742295891046524, "signal/advantage_abs_mean": 0.7460556268692017, "signal/advantage_pre_scale_abs_mean": 0.06816202774643898, "signal/advantage_pre_scale_std": 0.11762821078300476, "signal/advantage_std": 0.9827412247657776, "signal/brier_reward/centered_abs_mean": 0.09877004623413085, "signal/brier_reward/group_std_mean": 0.13211368918418884, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17169649600982667, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009877004846930503, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018939389660954476, "signal/confidence_uniqueness_reward/group_std_mean": 0.028860129415988922, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03311100825667381, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001893938984721899, "signal/format_reward/centered_abs_mean": 0.005116102378815412, "signal/format_reward/group_std_mean": 0.011858247593045235, "signal/format_reward/group_zero_std_frac": 0.9416666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04352925010025501, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002558051189407706, "signal/frontier_coverage_0/centered_abs_mean": 0.13048950880765914, "signal/frontier_coverage_0/group_std_mean": 0.17408806085586548, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03235846050083637, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0018659999826923014, "signal/frontier_coverage_1/centered_abs_mean": 0.13048950880765914, "signal/frontier_coverage_1/group_std_mean": 0.17408806085586548, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03235846050083637, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0018659999826923014, "signal/frontier_coverage_10/centered_abs_mean": 0.0592160664498806, "signal/frontier_coverage_10/group_std_mean": 0.0748526081442833, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014810064993798732, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008467897423543036, "signal/frontier_coverage_15/centered_abs_mean": 0.08147549778223037, "signal/frontier_coverage_15/group_std_mean": 0.1004263550043106, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020447418093681335, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001165099604986608, "signal/frontier_coverage_20/centered_abs_mean": 0.11275746524333954, "signal/frontier_coverage_20/group_std_mean": 0.14012570679187775, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028291113302111627, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001612431718967855, "signal/frontier_coverage_25/centered_abs_mean": 0.1510842740535736, "signal/frontier_coverage_25/group_std_mean": 0.18920941650867462, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0378778375685215, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021605050656944512, "signal/frontier_coverage_5/centered_abs_mean": 0.1297021821141243, "signal/frontier_coverage_5/group_std_mean": 0.17307354807853698, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.032162808999419215, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018547413172200322, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32503774762153625, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38833544254302976, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.570234090089798, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032503775879740716, "step": 865 }, { "calibration/aurc": 0.10472881819001853, "calibration/batch_distribution_entropy": 0.9079796069299875, "calibration/buffer_distribution_entropy": 0.9751814829986106, "calibration/confidence_entropy": 0.49213627310384805, "calibration/coverage@0%": 0.11510416666666667, "calibration/coverage@1%": 0.14114583333333333, "calibration/coverage@10%": 0.5958333333333334, "calibration/coverage@15%": 0.8401041666666667, "calibration/coverage@20%": 0.89375, "calibration/coverage@25%": 0.9239583333333334, "calibration/coverage@30%": 0.953125, "calibration/coverage@5%": 0.34843750000000007, "calibration/ece": 0.14281713541666663, "calibration/mean_confidence": 0.6558258854166668, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003211805555555558, "completions/max_length": 4017.2, "completions/max_terminated_length": 4017.2, "completions/mean_length": 1390.4245849609374, "completions/mean_terminated_length": 1394.8662841796875, "completions/min_length": 0.0, "completions/min_terminated_length": 417.4, "epoch": 2.09119886001425, "grad_norm": 0.002695485483855009, "learning_rate": 1.0216346153846154e-06, "loss": -0.0103, "num_tokens": 2483978098.0, "reward": 1.0271677494049072, "reward_std": 0.09729326367378235, "rewards/accuracy_reward": 0.7520833253860474, "rewards/brier_reward": 0.8424885869026184, "rewards/confidence_uniqueness_reward": 0.9420551657676697, "rewards/format_reward": 0.9967881917953492, "rewards/frontier_coverage_0": 0.010245061293244363, "rewards/frontier_coverage_1": 0.010245061293244363, "rewards/frontier_coverage_10": 0.049554044008255006, "rewards/frontier_coverage_15": 0.11662421822547912, "rewards/frontier_coverage_20": 0.19967409074306489, "rewards/frontier_coverage_25": 0.3006702125072479, "rewards/frontier_coverage_5": 0.010480654053390026, "rewards/frontier_entropy_batch_reward": -0.35696548223495483, "signal/accuracy_reward/centered_abs_mean": 0.10629340261220932, "signal/accuracy_reward/group_std_mean": 0.14241406172513962, "signal/accuracy_reward/group_zero_std_frac": 0.5861111283302307, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9110217213630676, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05314670130610466, "signal/advantage_abs_mean": 0.7621482253074646, "signal/advantage_pre_scale_abs_mean": 0.07348197922110558, "signal/advantage_pre_scale_std": 0.12210540175437927, "signal/advantage_std": 0.9827365040779114, "signal/brier_reward/centered_abs_mean": 0.1055911734700203, "signal/brier_reward/group_std_mean": 0.13631585836410523, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18323537707328796, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01055911760777235, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02010197788476944, "signal/confidence_uniqueness_reward/group_std_mean": 0.02892959825694561, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035044122114777566, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002010197751224041, "signal/format_reward/centered_abs_mean": 0.005463324673473835, "signal/format_reward/group_std_mean": 0.010764547064900399, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04613239541649818, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0027316623367369176, "signal/frontier_coverage_0/centered_abs_mean": 0.13712626695632935, "signal/frontier_coverage_0/group_std_mean": 0.17697598934173583, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03414354957640171, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019609056878834965, "signal/frontier_coverage_1/centered_abs_mean": 0.13712626695632935, "signal/frontier_coverage_1/group_std_mean": 0.17697598934173583, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03414354957640171, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019609056878834965, "signal/frontier_coverage_10/centered_abs_mean": 0.058862689137458804, "signal/frontier_coverage_10/group_std_mean": 0.07372922748327256, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014756158180534839, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008417364791966975, "signal/frontier_coverage_15/centered_abs_mean": 0.08503047823905945, "signal/frontier_coverage_15/group_std_mean": 0.1047013595700264, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02136564515531063, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012159358710050582, "signal/frontier_coverage_20/centered_abs_mean": 0.12002795040607453, "signal/frontier_coverage_20/group_std_mean": 0.14857746958732604, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030121758580207825, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001716399728320539, "signal/frontier_coverage_25/centered_abs_mean": 0.1620142638683319, "signal/frontier_coverage_25/group_std_mean": 0.2014380544424057, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04057658687233925, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002316803904250264, "signal/frontier_coverage_5/centered_abs_mean": 0.13623161017894744, "signal/frontier_coverage_5/group_std_mean": 0.17587542831897734, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03392289765179157, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019481121795251965, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3304478108882904, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3938392698764801, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.579087895154953, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03304478265345097, "step": 870 }, { "calibration/aurc": 0.14662707262516228, "calibration/batch_distribution_entropy": 0.9622453671594036, "calibration/buffer_distribution_entropy": 0.9745117786630301, "calibration/confidence_entropy": 0.4938231503236358, "calibration/coverage@0%": 0.10117493472584856, "calibration/coverage@1%": 0.14234116623150567, "calibration/coverage@10%": 0.4209747606614448, "calibration/coverage@15%": 0.5815246953872932, "calibration/coverage@20%": 0.6905080504786771, "calibration/coverage@25%": 0.757797541340296, "calibration/coverage@30%": 0.9203125, "calibration/coverage@5%": 0.2972734442993908, "calibration/ece": 0.1510147796997389, "calibration/mean_confidence": 0.5970026253807659, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002083333333333326, "completions/max_length": 3938.4, "completions/max_terminated_length": 3938.4, "completions/mean_length": 1414.036474609375, "completions/mean_terminated_length": 1416.9802734375, "completions/min_length": 0.0, "completions/min_terminated_length": 449.4, "epoch": 2.103198710016125, "grad_norm": 0.002670974237844348, "learning_rate": 9.915865384615386e-07, "loss": -0.0005, "num_tokens": 2503373014.0, "reward": 1.0179281949996948, "reward_std": 0.0986163780093193, "rewards/accuracy_reward": 0.7246527791023254, "rewards/brier_reward": 0.8295908212661743, "rewards/confidence_uniqueness_reward": 0.9478270053863526, "rewards/format_reward": 0.9979166626930237, "rewards/frontier_coverage_0": 0.01796838641166687, "rewards/frontier_coverage_1": 0.01796838641166687, "rewards/frontier_coverage_10": 0.05329090356826782, "rewards/frontier_coverage_15": 0.10762108713388444, "rewards/frontier_coverage_20": 0.1816571831703186, "rewards/frontier_coverage_25": 0.2727284550666809, "rewards/frontier_coverage_5": 0.018276363145560026, "rewards/frontier_entropy_batch_reward": -0.3067232221364975, "signal/accuracy_reward/centered_abs_mean": 0.11227213442325593, "signal/accuracy_reward/group_std_mean": 0.1481925517320633, "signal/accuracy_reward/group_zero_std_frac": 0.575, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9142273187637329, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05613606721162796, "signal/advantage_abs_mean": 0.7798005938529968, "signal/advantage_pre_scale_abs_mean": 0.07608538419008255, "signal/advantage_pre_scale_std": 0.12115374058485032, "signal/advantage_std": 0.9828497409820557, "signal/brier_reward/centered_abs_mean": 0.10694562196731568, "signal/brier_reward/group_std_mean": 0.13694375157356262, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17493544220924379, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010694562830030918, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01673703547567129, "signal/confidence_uniqueness_reward/group_std_mean": 0.023892204836010934, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.027389925345778464, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.001673703594133258, "signal/format_reward/centered_abs_mean": 0.0036566840135492384, "signal/format_reward/group_std_mean": 0.007749038189649582, "signal/format_reward/group_zero_std_frac": 0.9638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.029558508843183517, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018283420067746192, "signal/frontier_coverage_0/centered_abs_mean": 0.1515151709318161, "signal/frontier_coverage_0/group_std_mean": 0.19341229498386384, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03542120829224586, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.00216666697524488, "signal/frontier_coverage_1/centered_abs_mean": 0.1515151709318161, "signal/frontier_coverage_1/group_std_mean": 0.19341229498386384, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03542120829224586, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.00216666697524488, "signal/frontier_coverage_10/centered_abs_mean": 0.060925094038248064, "signal/frontier_coverage_10/group_std_mean": 0.07617206424474716, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014272183738648891, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000871228810865432, "signal/frontier_coverage_15/centered_abs_mean": 0.07883169800043106, "signal/frontier_coverage_15/group_std_mean": 0.09689974635839463, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.018499715998768807, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011272932635620237, "signal/frontier_coverage_20/centered_abs_mean": 0.10982066988945008, "signal/frontier_coverage_20/group_std_mean": 0.13585858047008514, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.025753576681017874, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001570435566827655, "signal/frontier_coverage_25/centered_abs_mean": 0.14952315390110016, "signal/frontier_coverage_25/group_std_mean": 0.18586367070674897, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03504568859934807, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021381810773164036, "signal/frontier_coverage_5/centered_abs_mean": 0.15073439180850984, "signal/frontier_coverage_5/group_std_mean": 0.19244405031204223, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03523862287402153, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021555018145591022, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33520500659942626, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3998372495174408, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5495693683624268, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03352050185203552, "step": 875 }, { "calibration/aurc": 0.1376710203770225, "calibration/batch_distribution_entropy": 0.9580564845872235, "calibration/buffer_distribution_entropy": 0.9737883093034526, "calibration/confidence_entropy": 0.48958195789736864, "calibration/coverage@0%": 0.11123125504080489, "calibration/coverage@1%": 0.20157068062827227, "calibration/coverage@10%": 0.3906456331250939, "calibration/coverage@15%": 0.7124519841975039, "calibration/coverage@20%": 0.8295517613768404, "calibration/coverage@25%": 0.8598868125709129, "calibration/coverage@30%": 0.8834183150383442, "calibration/coverage@5%": 0.33841810998865396, "calibration/ece": 0.18009319174333246, "calibration/mean_confidence": 0.5862276137465312, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001649305555555558, "completions/max_length": 3926.8, "completions/max_terminated_length": 3926.8, "completions/mean_length": 1418.9532958984375, "completions/mean_terminated_length": 1421.2898193359374, "completions/min_length": 0.0, "completions/min_terminated_length": 428.4, "epoch": 2.115198560018, "grad_norm": 0.002700896468013525, "learning_rate": 9.615384615384617e-07, "loss": -0.0022, "num_tokens": 2522803900.0, "reward": 1.020362961292267, "reward_std": 0.09932073801755906, "rewards/accuracy_reward": 0.7322916746139526, "rewards/brier_reward": 0.8255838513374328, "rewards/confidence_uniqueness_reward": 0.9477863907814026, "rewards/format_reward": 0.9983506917953491, "rewards/frontier_coverage_0": 0.008382726181298494, "rewards/frontier_coverage_1": 0.008382726181298494, "rewards/frontier_coverage_10": 0.045684900134801865, "rewards/frontier_coverage_15": 0.10314829349517822, "rewards/frontier_coverage_20": 0.17775794565677644, "rewards/frontier_coverage_25": 0.2699141949415207, "rewards/frontier_coverage_5": 0.008700376003980636, "rewards/frontier_entropy_batch_reward": -0.3118946313858032, "signal/accuracy_reward/centered_abs_mean": 0.11066623479127884, "signal/accuracy_reward/group_std_mean": 0.15176202952861786, "signal/accuracy_reward/group_zero_std_frac": 0.5472222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9180627942085267, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05533311739563942, "signal/advantage_abs_mean": 0.7546151518821717, "signal/advantage_pre_scale_abs_mean": 0.07361921072006225, "signal/advantage_pre_scale_std": 0.1216355249285698, "signal/advantage_std": 0.9828206658363342, "signal/brier_reward/centered_abs_mean": 0.11196579039096832, "signal/brier_reward/group_std_mean": 0.14342193007469178, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18711092174053193, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011196578480303287, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01612330451607704, "signal/confidence_uniqueness_reward/group_std_mean": 0.024166127666831017, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02694421596825123, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016123305074870586, "signal/format_reward/centered_abs_mean": 0.003152126749046147, "signal/format_reward/group_std_mean": 0.008134117349982262, "signal/format_reward/group_zero_std_frac": 0.9583333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.026590963266789912, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0015760633745230735, "signal/frontier_coverage_0/centered_abs_mean": 0.15225654542446138, "signal/frontier_coverage_0/group_std_mean": 0.1960272341966629, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03628757819533348, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021772684995085, "signal/frontier_coverage_1/centered_abs_mean": 0.15225654542446138, "signal/frontier_coverage_1/group_std_mean": 0.1960272341966629, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03628757819533348, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021772684995085, "signal/frontier_coverage_10/centered_abs_mean": 0.06339073628187179, "signal/frontier_coverage_10/group_std_mean": 0.08014297634363174, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015141036547720432, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009064875310286879, "signal/frontier_coverage_15/centered_abs_mean": 0.08139910399913788, "signal/frontier_coverage_15/group_std_mean": 0.10091938078403473, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019420773163437842, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011640072101727129, "signal/frontier_coverage_20/centered_abs_mean": 0.11484313309192658, "signal/frontier_coverage_20/group_std_mean": 0.14308151602745056, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027388099953532218, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016422567423433065, "signal/frontier_coverage_25/centered_abs_mean": 0.15665629208087922, "signal/frontier_coverage_25/group_std_mean": 0.19622641503810884, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0373531699180603, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022401849273592234, "signal/frontier_coverage_5/centered_abs_mean": 0.15140585005283355, "signal/frontier_coverage_5/group_std_mean": 0.1949646919965744, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03608435168862343, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002165103517472744, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3276274800300598, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39510163068771365, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5458629727363586, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03276274874806404, "step": 880 }, { "calibration/aurc": 0.07528091745904578, "calibration/batch_distribution_entropy": 0.9633578410189617, "calibration/buffer_distribution_entropy": 0.9736194749383638, "calibration/confidence_entropy": 0.4910086738322564, "calibration/coverage@0%": 0.12088364882506528, "calibration/coverage@1%": 0.33416149912967796, "calibration/coverage@10%": 0.6406005221932115, "calibration/coverage@15%": 0.822749401653612, "calibration/coverage@20%": 0.9265217036553525, "calibration/coverage@25%": 0.9723931135770234, "calibration/coverage@30%": 0.9895833333333334, "calibration/coverage@5%": 0.5827417863359444, "calibration/ece": 0.23126799257506522, "calibration/mean_confidence": 0.5690869960019582, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00277777777777779, "completions/max_length": 3843.4, "completions/max_terminated_length": 3843.4, "completions/mean_length": 1369.9625732421875, "completions/mean_terminated_length": 1373.880810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 398.8, "epoch": 2.127198410019875, "grad_norm": 0.0029529957100749016, "learning_rate": 9.314903846153847e-07, "loss": -0.0044, "num_tokens": 2541659533.0, "reward": 1.0487470626831055, "reward_std": 0.08783675283193589, "rewards/accuracy_reward": 0.7880208373069764, "rewards/brier_reward": 0.8365139484405517, "rewards/confidence_uniqueness_reward": 0.9465442538261414, "rewards/format_reward": 0.9972222089767456, "rewards/frontier_coverage_0": -0.01605305355042219, "rewards/frontier_coverage_1": -0.01605305355042219, "rewards/frontier_coverage_10": 0.044044318795204165, "rewards/frontier_coverage_15": 0.11718032211065292, "rewards/frontier_coverage_20": 0.20520202815532684, "rewards/frontier_coverage_25": 0.3133831262588501, "rewards/frontier_coverage_5": -0.015344736352562904, "rewards/frontier_entropy_batch_reward": -0.31223025918006897, "signal/accuracy_reward/centered_abs_mean": 0.09235026091337203, "signal/accuracy_reward/group_std_mean": 0.12444257289171219, "signal/accuracy_reward/group_zero_std_frac": 0.6305555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8251009345054626, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04617513045668602, "signal/advantage_abs_mean": 0.7711875319480896, "signal/advantage_pre_scale_abs_mean": 0.06669707149267197, "signal/advantage_pre_scale_std": 0.10958891659975052, "signal/advantage_std": 0.9827027201652527, "signal/brier_reward/centered_abs_mean": 0.10570832341909409, "signal/brier_reward/group_std_mean": 0.1348507136106491, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18915933072566987, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010570832900702954, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016615297459065915, "signal/confidence_uniqueness_reward/group_std_mean": 0.023423294350504876, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029620739817619323, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016615298809483647, "signal/format_reward/centered_abs_mean": 0.0035047742538154127, "signal/format_reward/group_std_mean": 0.007171806693077087, "signal/format_reward/group_zero_std_frac": 0.9666666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.030849797092378138, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0017523871269077063, "signal/frontier_coverage_0/centered_abs_mean": 0.14871225953102113, "signal/frontier_coverage_0/group_std_mean": 0.18890169262886047, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03809470310807228, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021265852730721234, "signal/frontier_coverage_1/centered_abs_mean": 0.14871225953102113, "signal/frontier_coverage_1/group_std_mean": 0.18890169262886047, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03809470310807228, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021265852730721234, "signal/frontier_coverage_10/centered_abs_mean": 0.061393161118030545, "signal/frontier_coverage_10/group_std_mean": 0.07690578997135163, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01572293322533369, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008779221563600004, "signal/frontier_coverage_15/centered_abs_mean": 0.07879534959793091, "signal/frontier_coverage_15/group_std_mean": 0.09692421555519104, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020150484517216682, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011267734691500664, "signal/frontier_coverage_20/centered_abs_mean": 0.10975634306669235, "signal/frontier_coverage_20/group_std_mean": 0.13552465736865998, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02805260457098484, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001569515699520707, "signal/frontier_coverage_25/centered_abs_mean": 0.14789953231811523, "signal/frontier_coverage_25/group_std_mean": 0.1838620573282242, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03780416175723076, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002114963345229626, "signal/frontier_coverage_5/centered_abs_mean": 0.1476011872291565, "signal/frontier_coverage_5/group_std_mean": 0.1875333845615387, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03781076371669769, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002110696933232248, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32497783899307253, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3905618965625763, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5809773206710815, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03249778635799885, "step": 885 }, { "calibration/aurc": 0.04501724498201738, "calibration/batch_distribution_entropy": 0.9603090893588673, "calibration/buffer_distribution_entropy": 0.9745517804075691, "calibration/confidence_entropy": 0.4651638780880843, "calibration/coverage@0%": 0.2614841710182768, "calibration/coverage@1%": 0.5651300043516102, "calibration/coverage@10%": 0.8623463337684942, "calibration/coverage@15%": 0.9040361183637946, "calibration/coverage@20%": 0.9347979221061792, "calibration/coverage@25%": 0.9624374456048738, "calibration/coverage@30%": 0.9916449086161879, "calibration/coverage@5%": 0.7617901436031331, "calibration/ece": 0.2291900868690165, "calibration/mean_confidence": 0.5784430661444735, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001822916666666674, "completions/max_length": 3955.2, "completions/max_terminated_length": 3955.2, "completions/mean_length": 1485.9614990234375, "completions/mean_terminated_length": 1488.7108154296875, "completions/min_length": 90.8, "completions/min_terminated_length": 420.0, "epoch": 2.13919826002175, "grad_norm": 0.0026017827913165092, "learning_rate": 9.014423076923078e-07, "loss": 0.001, "num_tokens": 2561890065.0, "reward": 1.0419267177581788, "reward_std": 0.0928407445549965, "rewards/accuracy_reward": 0.7695312619209289, "rewards/brier_reward": 0.835858428478241, "rewards/confidence_uniqueness_reward": 0.9476023077964782, "rewards/format_reward": 0.9981770753860474, "rewards/frontier_coverage_0": 0.0017650447785854339, "rewards/frontier_coverage_1": 0.0017650447785854339, "rewards/frontier_coverage_10": 0.05538591891527176, "rewards/frontier_coverage_15": 0.12445316910743713, "rewards/frontier_coverage_20": 0.21158009469509126, "rewards/frontier_coverage_25": 0.3179014027118683, "rewards/frontier_coverage_5": 0.0021275728940963745, "rewards/frontier_entropy_batch_reward": -0.3049774348735809, "signal/accuracy_reward/centered_abs_mean": 0.10240342766046524, "signal/accuracy_reward/group_std_mean": 0.1418137162923813, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.913576877117157, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05120171383023262, "signal/advantage_abs_mean": 0.7645326375961303, "signal/advantage_pre_scale_abs_mean": 0.06943797469139099, "signal/advantage_pre_scale_std": 0.11579804867506027, "signal/advantage_std": 0.9827001333236695, "signal/brier_reward/centered_abs_mean": 0.10340909659862518, "signal/brier_reward/group_std_mean": 0.13372913300991057, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18566021621227263, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010340910032391548, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017535041272640228, "signal/confidence_uniqueness_reward/group_std_mean": 0.024436182528734206, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0314997099339962, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017535041086375713, "signal/format_reward/centered_abs_mean": 0.003293185739312321, "signal/format_reward/group_std_mean": 0.006819825246930122, "signal/format_reward/group_zero_std_frac": 0.9694444417953492, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.02991781122982502, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0016465928696561606, "signal/frontier_coverage_0/centered_abs_mean": 0.14981991052627563, "signal/frontier_coverage_0/group_std_mean": 0.19636679589748382, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03842330724000931, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002142424648627639, "signal/frontier_coverage_1/centered_abs_mean": 0.14981991052627563, "signal/frontier_coverage_1/group_std_mean": 0.19636679589748382, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03842330724000931, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002142424648627639, "signal/frontier_coverage_10/centered_abs_mean": 0.06453083753585816, "signal/frontier_coverage_10/group_std_mean": 0.080882328748703, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01652641948312521, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009227909496985376, "signal/frontier_coverage_15/centered_abs_mean": 0.08124902099370956, "signal/frontier_coverage_15/group_std_mean": 0.09994722455739975, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020808987319469452, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011618610005825758, "signal/frontier_coverage_20/centered_abs_mean": 0.11021952629089356, "signal/frontier_coverage_20/group_std_mean": 0.13671945929527282, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028233184292912484, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001576139172539115, "signal/frontier_coverage_25/centered_abs_mean": 0.1474437177181244, "signal/frontier_coverage_25/group_std_mean": 0.18446506559848785, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03775979653000831, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0021084450650960206, "signal/frontier_coverage_5/centered_abs_mean": 0.14860370755195618, "signal/frontier_coverage_5/group_std_mean": 0.19485906660556793, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03811139240860939, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021250330843031406, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33459954857826235, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3981178283691406, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5996464252471924, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03345995619893074, "step": 890 }, { "calibration/aurc": 0.1280523015671718, "calibration/batch_distribution_entropy": 0.9510907337880019, "calibration/buffer_distribution_entropy": 0.974771002805943, "calibration/confidence_entropy": 0.48342940663851514, "calibration/coverage@0%": 0.049491405570060924, "calibration/coverage@1%": 0.049491405570060924, "calibration/coverage@10%": 0.38817721932114885, "calibration/coverage@15%": 0.6707218233246302, "calibration/coverage@20%": 0.8403707027850305, "calibration/coverage@25%": 0.9280134899912967, "calibration/coverage@30%": 0.9958333333333333, "calibration/coverage@5%": 0.2808447563098347, "calibration/ece": 0.18869512305537425, "calibration/mean_confidence": 0.5883434234796562, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888889063, "completions/max_length": 3923.2, "completions/max_terminated_length": 3923.2, "completions/mean_length": 1463.28515625, "completions/mean_terminated_length": 1466.5015625, "completions/min_length": 0.0, "completions/min_terminated_length": 335.2, "epoch": 2.151198110023625, "grad_norm": 0.0026420399080961943, "learning_rate": 8.713942307692308e-07, "loss": -0.0033, "num_tokens": 2581828038.0, "reward": 1.026082420349121, "reward_std": 0.09336404502391815, "rewards/accuracy_reward": 0.7427951574325562, "rewards/brier_reward": 0.8260280966758728, "rewards/confidence_uniqueness_reward": 0.9466118693351746, "rewards/format_reward": 0.9978298664093017, "rewards/frontier_coverage_0": 0.0059367487207055095, "rewards/frontier_coverage_1": 0.0059367487207055095, "rewards/frontier_coverage_10": 0.05136653557419777, "rewards/frontier_coverage_15": 0.11881858706474305, "rewards/frontier_coverage_20": 0.20078957080841064, "rewards/frontier_coverage_25": 0.2984639883041382, "rewards/frontier_coverage_5": 0.006314245797693729, "rewards/frontier_entropy_batch_reward": -0.31327160596847536, "signal/accuracy_reward/centered_abs_mean": 0.10116644948720932, "signal/accuracy_reward/group_std_mean": 0.14085038453340532, "signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8828286409378052, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05058322474360466, "signal/advantage_abs_mean": 0.7592063546180725, "signal/advantage_pre_scale_abs_mean": 0.06951456665992736, "signal/advantage_pre_scale_std": 0.11602886617183686, "signal/advantage_std": 0.982738447189331, "signal/brier_reward/centered_abs_mean": 0.1086281344294548, "signal/brier_reward/group_std_mean": 0.14015234708786012, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18977911174297332, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010862813144922257, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01798480302095413, "signal/confidence_uniqueness_reward/group_std_mean": 0.02583237551152706, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031481166183948514, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017984803766012193, "signal/format_reward/centered_abs_mean": 0.003965928812976927, "signal/format_reward/group_std_mean": 0.008585151471197604, "signal/format_reward/group_zero_std_frac": 0.9611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.034098946023732424, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019829644064884634, "signal/frontier_coverage_0/centered_abs_mean": 0.15116261839866638, "signal/frontier_coverage_0/group_std_mean": 0.1969261050224304, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.037714557349681856, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021616254933178427, "signal/frontier_coverage_1/centered_abs_mean": 0.15116261839866638, "signal/frontier_coverage_1/group_std_mean": 0.1969261050224304, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.037714557349681856, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021616254933178427, "signal/frontier_coverage_10/centered_abs_mean": 0.064113200455904, "signal/frontier_coverage_10/group_std_mean": 0.07983765006065369, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01604436244815588, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009168186923488974, "signal/frontier_coverage_15/centered_abs_mean": 0.0844537153840065, "signal/frontier_coverage_15/group_std_mean": 0.10395829975605012, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021203552559018134, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001207688101567328, "signal/frontier_coverage_20/centered_abs_mean": 0.1167292207479477, "signal/frontier_coverage_20/group_std_mean": 0.1449556201696396, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.029318232834339143, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016692279605194926, "signal/frontier_coverage_25/centered_abs_mean": 0.15577602088451387, "signal/frontier_coverage_25/group_std_mean": 0.1945989966392517, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.039113936573266984, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002227597124874592, "signal/frontier_coverage_5/centered_abs_mean": 0.15000051259994507, "signal/frontier_coverage_5/group_std_mean": 0.19544619619846343, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03742243126034737, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002145007345825434, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32618371248245237, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3917438447475433, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5712901592254639, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03261837288737297, "step": 895 }, { "calibration/aurc": 0.17971495000616086, "calibration/batch_distribution_entropy": 0.9387109815779631, "calibration/buffer_distribution_entropy": 0.975423395766124, "calibration/confidence_entropy": 0.47164422241919307, "calibration/coverage@0%": 0.016154013961605585, "calibration/coverage@1%": 0.016154013961605585, "calibration/coverage@10%": 0.33542484729493893, "calibration/coverage@15%": 0.39011234729493893, "calibration/coverage@20%": 0.6381653577661431, "calibration/coverage@25%": 0.8498009380453752, "calibration/coverage@30%": 0.9270015270506107, "calibration/coverage@5%": 0.016154013961605585, "calibration/ece": 0.17659054720222508, "calibration/mean_confidence": 0.5827529517615619, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0028645833333333258, "completions/max_length": 4001.0, "completions/max_terminated_length": 4001.0, "completions/mean_length": 1518.630029296875, "completions/mean_terminated_length": 1523.0183837890625, "completions/min_length": 0.0, "completions/min_terminated_length": 468.6, "epoch": 2.1631979600255, "grad_norm": 0.0024768190924078226, "learning_rate": 8.41346153846154e-07, "loss": -0.0054, "num_tokens": 2602411456.0, "reward": 1.0215399146080018, "reward_std": 0.09830788671970367, "rewards/accuracy_reward": 0.7322048544883728, "rewards/brier_reward": 0.8238555550575256, "rewards/confidence_uniqueness_reward": 0.9468532681465149, "rewards/format_reward": 0.9971354246139527, "rewards/frontier_coverage_0": 0.011091101169586181, "rewards/frontier_coverage_1": 0.011091101169586181, "rewards/frontier_coverage_10": 0.05177242755889892, "rewards/frontier_coverage_15": 0.11416967511177063, "rewards/frontier_coverage_20": 0.1915341079235077, "rewards/frontier_coverage_25": 0.2842023193836212, "rewards/frontier_coverage_5": 0.011335279606282712, "rewards/frontier_entropy_batch_reward": -0.2985638976097107, "signal/accuracy_reward/centered_abs_mean": 0.10755750834941864, "signal/accuracy_reward/group_std_mean": 0.1495337277650833, "signal/accuracy_reward/group_zero_std_frac": 0.547222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8794362545013428, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05377875417470932, "signal/advantage_abs_mean": 0.7575773119926452, "signal/advantage_pre_scale_abs_mean": 0.07303174883127213, "signal/advantage_pre_scale_std": 0.12083332985639572, "signal/advantage_std": 0.9828310012817383, "signal/brier_reward/centered_abs_mean": 0.11027123332023621, "signal/brier_reward/group_std_mean": 0.14200958609580994, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18250376284122466, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011027123592793942, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018133307434618474, "signal/confidence_uniqueness_reward/group_std_mean": 0.026324766129255293, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03011079877614975, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018133309204131365, "signal/format_reward/centered_abs_mean": 0.004638671898283065, "signal/format_reward/group_std_mean": 0.00965967532247305, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.03860214501619339, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0023193359491415324, "signal/frontier_coverage_0/centered_abs_mean": 0.15348501801490783, "signal/frontier_coverage_0/group_std_mean": 0.20067269504070281, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036319942027330396, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002194835641421378, "signal/frontier_coverage_1/centered_abs_mean": 0.15348501801490783, "signal/frontier_coverage_1/group_std_mean": 0.20067269504070281, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036319942027330396, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002194835641421378, "signal/frontier_coverage_10/centered_abs_mean": 0.06233002617955208, "signal/frontier_coverage_10/group_std_mean": 0.078104929625988, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014825647883117198, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000891319359652698, "signal/frontier_coverage_15/centered_abs_mean": 0.08362076282501221, "signal/frontier_coverage_15/group_std_mean": 0.1029461145401001, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01982920467853546, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011957768816500901, "signal/frontier_coverage_20/centered_abs_mean": 0.11619037836790085, "signal/frontier_coverage_20/group_std_mean": 0.1436010032892227, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027482646331191064, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001661522383801639, "signal/frontier_coverage_25/centered_abs_mean": 0.15632675886154174, "signal/frontier_coverage_25/group_std_mean": 0.1943357616662979, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03692755475640297, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022354727145284413, "signal/frontier_coverage_5/centered_abs_mean": 0.1523052781820297, "signal/frontier_coverage_5/group_std_mean": 0.19916028082370757, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.036041303724050525, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021779653849080204, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3256483495235443, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39229129552841185, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5407821238040924, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03256483599543571, "step": 900 }, { "epoch": 2.1631979600255, "eval_calibration/aurc": 0.12737566735071082, "eval_calibration/batch_distribution_entropy": 0.9176665914052275, "eval_calibration/buffer_distribution_entropy": 0.9754314783079789, "eval_calibration/confidence_entropy": 0.4755197109213362, "eval_calibration/coverage@0%": 0.32577284946236557, "eval_calibration/coverage@1%": 0.32577284946236557, "eval_calibration/coverage@10%": 0.46169354838709675, "eval_calibration/coverage@15%": 0.5354502688172044, "eval_calibration/coverage@20%": 0.8802083333333334, "eval_calibration/coverage@25%": 0.9427083333333334, "eval_calibration/coverage@30%": 1.0, "eval_calibration/coverage@5%": 0.34139784946236557, "eval_calibration/ece": 0.2249175739247312, "eval_calibration/mean_confidence": 0.5708137432795698, "eval_completions/clipped_ratio": 0.0017361111111111234, "eval_completions/max_length": 3592.1666666666665, "eval_completions/max_terminated_length": 3592.1666666666665, "eval_completions/mean_length": 1524.2620035807292, "eval_completions/mean_terminated_length": 1526.8823852539062, "eval_completions/min_length": 392.8333333333333, "eval_completions/min_terminated_length": 567.8333333333334, "eval_loss": 0.0, "eval_num_tokens": 2602411456.0, "eval_reward": 0.9374911387761434, "eval_reward_std": 0.22820752362410227, "eval_rewards/accuracy_reward": 0.7118055522441864, "eval_rewards/brier_reward": 0.8265486260255178, "eval_rewards/confidence_uniqueness_reward": 0.8966138859589895, "eval_rewards/format_reward": 0.9973958432674408, "eval_rewards/frontier_coverage_0": 0.02534069788331787, "eval_rewards/frontier_coverage_1": 0.02534069788331787, "eval_rewards/frontier_coverage_10": 0.05668467034896215, "eval_rewards/frontier_coverage_15": 0.11529384429256122, "eval_rewards/frontier_coverage_20": 0.1911569188038508, "eval_rewards/frontier_coverage_25": 0.2818978354334831, "eval_rewards/frontier_coverage_5": 0.02552862201506893, "eval_rewards/frontier_entropy_batch_reward": -0.9973958432674408, "eval_runtime": 207.3445, "eval_samples_per_second": 4.823, "eval_signal/accuracy_reward/centered_abs_mean": 0.3916015625, "eval_signal/accuracy_reward/group_std_mean": 0.4472481807072957, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.867256224155426, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19580078125, "eval_signal/advantage_abs_mean": 0.8500007092952728, "eval_signal/advantage_pre_scale_abs_mean": 0.19565576066573462, "eval_signal/advantage_pre_scale_std": 0.22633356104294458, "eval_signal/advantage_std": 0.9863857130209605, "eval_signal/brier_reward/centered_abs_mean": 0.16644690930843353, "eval_signal/brier_reward/group_std_mean": 0.22046558558940887, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07374625280499458, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.016644690961887438, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.042304361859957375, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.05535396312673887, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01876258881141742, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004230436325694124, "eval_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/format_reward/group_std_mean": 0.014731391333043575, "eval_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.010851632803678513, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2662147382895152, "eval_signal/frontier_coverage_0/group_std_mean": 0.3709094375371933, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016928008447090786, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003806870896369219, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2662147382895152, "eval_signal/frontier_coverage_1/group_std_mean": 0.3709094375371933, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016928008447090786, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003806870896369219, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.08367854605118434, "eval_signal/frontier_coverage_10/group_std_mean": 0.11023158207535744, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005312102691580852, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011966032131264608, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.12918043757478395, "eval_signal/frontier_coverage_15/group_std_mean": 0.16170358409484228, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00819395606716474, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018472802476026118, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.21566026906172434, "eval_signal/frontier_coverage_20/group_std_mean": 0.2624283855160077, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013669513786832491, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0030839417595416307, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.31325619916121167, "eval_signal/frontier_coverage_25/group_std_mean": 0.3792371451854706, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.019848248300453026, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004479563406979044, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2640439545114835, "eval_signal/frontier_coverage_5/group_std_mean": 0.368171289563179, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.01678965923686822, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00377582855677853, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0021703265762577453, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.000504557314949731, "eval_steps_per_second": 0.029, "step": 900 }, { "epoch": 2.1631979600255, "step": 900, "train_probe_calibration/aurc": 0.10339707316454276, "train_probe_calibration/batch_distribution_entropy": 0.9102197374475085, "train_probe_calibration/buffer_distribution_entropy": 0.9751232554999216, "train_probe_calibration/confidence_entropy": 0.4694546862543447, "train_probe_calibration/coverage@0%": 0.35668682795698925, "train_probe_calibration/coverage@1%": 0.35668682795698925, "train_probe_calibration/coverage@10%": 0.5608198924731183, "train_probe_calibration/coverage@15%": 0.7224462365591399, "train_probe_calibration/coverage@20%": 0.8996975806451614, "train_probe_calibration/coverage@25%": 0.9519489247311829, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.43565188172043007, "train_probe_calibration/ece": 0.23572354166666673, "train_probe_calibration/mean_confidence": 0.6060012567204301, "train_probe_completions/clipped_ratio": 0.002604166666666685, "train_probe_completions/max_length": 3242.0, "train_probe_completions/max_terminated_length": 3242.0, "train_probe_completions/mean_length": 1525.1625366210938, "train_probe_completions/mean_terminated_length": 1529.1767578125, "train_probe_completions/min_length": 219.16666666666666, "train_probe_completions/min_terminated_length": 446.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 2602411456.0, "train_probe_reward": 0.9594915211200714, "train_probe_reward_std": 0.21700799961884817, "train_probe_rewards/accuracy_reward": 0.7560763955116272, "train_probe_rewards/brier_reward": 0.8315047522385915, "train_probe_rewards/confidence_uniqueness_reward": 0.8951753179232279, "train_probe_rewards/format_reward": 0.9973958432674408, "train_probe_rewards/frontier_coverage_0": 0.003199717883641521, "train_probe_rewards/frontier_coverage_1": 0.003199717883641521, "train_probe_rewards/frontier_coverage_10": 0.0520126453290383, "train_probe_rewards/frontier_coverage_15": 0.11875824133555095, "train_probe_rewards/frontier_coverage_20": 0.20274977634350458, "train_probe_rewards/frontier_coverage_25": 0.303492138783137, "train_probe_rewards/frontier_coverage_5": 0.003787370825496813, "train_probe_rewards/frontier_entropy_batch_reward": -0.9973958432674408, "train_probe_runtime": 195.4243, "train_probe_samples_per_second": 5.117, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3585611979166667, "train_probe_signal/accuracy_reward/group_std_mean": 0.4281422396500905, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8391776780287424, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17928059895833334, "train_probe_signal/advantage_abs_mean": 0.8138086001078287, "train_probe_signal/advantage_pre_scale_abs_mean": 0.1782543882727623, "train_probe_signal/advantage_pre_scale_std": 0.2158868486682574, "train_probe_signal/advantage_std": 0.9863618016242981, "train_probe_signal/brier_reward/centered_abs_mean": 0.16066461553176245, "train_probe_signal/brier_reward/group_std_mean": 0.22043094535668692, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07519176974892616, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.016066461335867643, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04383396108945211, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.057254182174801826, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02052529404560725, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004383396008051932, "train_probe_signal/format_reward/centered_abs_mean": 0.0050455727614462376, "train_probe_signal/format_reward/group_std_mean": 0.014731391333043575, "train_probe_signal/format_reward/group_zero_std_frac": 0.9166666865348816, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.011859034498532614, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0025227863807231188, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.251843864719073, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.37152427931626636, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01686274539679289, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036013672749201455, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.251843864719073, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.37152427931626636, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01686274539679289, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0036013672749201455, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.08020331958929698, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.10853784407178561, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005366942146793008, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0011469074330913525, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.12478353704015414, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.1562705859541893, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008351171389222145, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0017844046621272962, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.20546038200457892, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.25083090364933014, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.013755069114267826, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0029380834894254804, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.29606155057748157, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.3606761296590169, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.01982360954085986, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004233680199831724, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.2498022640744845, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3688337703545888, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016726200158397358, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003572172368876636, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.0050455727614462376, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.014731391333043575, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666865348816, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0023718070394049087, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0005045572761446238, "train_probe_steps_per_second": 0.031 }, { "calibration/aurc": 0.11625473761732023, "calibration/batch_distribution_entropy": 0.9375525909506154, "calibration/buffer_distribution_entropy": 0.9747469529937035, "calibration/confidence_entropy": 0.5016573158140087, "calibration/coverage@0%": 0.07990803583054076, "calibration/coverage@1%": 0.14413780084359556, "calibration/coverage@10%": 0.5565305096694833, "calibration/coverage@15%": 0.6223219716048876, "calibration/coverage@20%": 0.7940382941688425, "calibration/coverage@25%": 0.8649776979982594, "calibration/coverage@30%": 0.928077404264578, "calibration/coverage@5%": 0.5094944015850825, "calibration/ece": 0.18248357807338889, "calibration/mean_confidence": 0.6210023286462509, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004600694444444442, "completions/max_length": 4064.8, "completions/max_terminated_length": 4064.8, "completions/mean_length": 1550.9193603515625, "completions/mean_terminated_length": 1558.0166259765624, "completions/min_length": 0.0, "completions/min_terminated_length": 433.2, "epoch": 2.175197810027375, "grad_norm": 0.0024279081262648106, "learning_rate": 8.11298076923077e-07, "loss": -0.0141, "num_tokens": 2623369151.0, "reward": 1.0176831007003784, "reward_std": 0.10145644396543503, "rewards/accuracy_reward": 0.7294270753860473, "rewards/brier_reward": 0.828159236907959, "rewards/confidence_uniqueness_reward": 0.9443735361099244, "rewards/format_reward": 0.9953992962837219, "rewards/frontier_coverage_0": 0.015256157889962197, "rewards/frontier_coverage_1": 0.015256157889962197, "rewards/frontier_coverage_10": 0.05224640518426895, "rewards/frontier_coverage_15": 0.11739680767059327, "rewards/frontier_coverage_20": 0.1974783331155777, "rewards/frontier_coverage_25": 0.2923546195030212, "rewards/frontier_coverage_5": 0.015459264814853668, "rewards/frontier_entropy_batch_reward": -0.32071307897567747, "signal/accuracy_reward/centered_abs_mean": 0.11327582597732544, "signal/accuracy_reward/group_std_mean": 0.1501081556081772, "signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9572094082832336, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05663791298866272, "signal/advantage_abs_mean": 0.7761594295501709, "signal/advantage_pre_scale_abs_mean": 0.07832107692956924, "signal/advantage_pre_scale_std": 0.12819174826145172, "signal/advantage_std": 0.9827957510948181, "signal/brier_reward/centered_abs_mean": 0.1068603053689003, "signal/brier_reward/group_std_mean": 0.1386454313993454, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18077919483184815, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01068603079766035, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020939309895038605, "signal/confidence_uniqueness_reward/group_std_mean": 0.0292234193533659, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03532315455377102, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020939309615641832, "signal/format_reward/centered_abs_mean": 0.00721028633415699, "signal/format_reward/group_std_mean": 0.012249564565718175, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06003017425537109, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003605143167078495, "signal/frontier_coverage_0/centered_abs_mean": 0.14778611361980437, "signal/frontier_coverage_0/group_std_mean": 0.19523155093193054, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0357908271253109, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002113341446965933, "signal/frontier_coverage_1/centered_abs_mean": 0.14778611361980437, "signal/frontier_coverage_1/group_std_mean": 0.19523155093193054, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0357908271253109, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002113341446965933, "signal/frontier_coverage_10/centered_abs_mean": 0.05996151715517044, "signal/frontier_coverage_10/group_std_mean": 0.07582777291536331, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.014531717076897622, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008574496838264168, "signal/frontier_coverage_15/centered_abs_mean": 0.08238778412342071, "signal/frontier_coverage_15/group_std_mean": 0.10163151174783706, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.01998242549598217, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011781453620642423, "signal/frontier_coverage_20/centered_abs_mean": 0.11567231565713883, "signal/frontier_coverage_20/group_std_mean": 0.14291902482509614, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028052419424057007, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001654114224947989, "signal/frontier_coverage_25/centered_abs_mean": 0.15648081600666047, "signal/frontier_coverage_25/group_std_mean": 0.19369642734527587, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037944577634334564, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002237675618380308, "signal/frontier_coverage_5/centered_abs_mean": 0.14658704698085784, "signal/frontier_coverage_5/group_std_mean": 0.19370121657848358, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03550057634711266, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020961946807801723, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33480802178382874, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40186876654624937, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5686902463436126, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033480801805853846, "step": 905 }, { "calibration/aurc": 0.0762505990821146, "calibration/batch_distribution_entropy": 0.9448920258160249, "calibration/buffer_distribution_entropy": 0.9749763470687652, "calibration/confidence_entropy": 0.4879323731747208, "calibration/coverage@0%": 0.17987303493149745, "calibration/coverage@1%": 0.23371247646727406, "calibration/coverage@10%": 0.676308814637699, "calibration/coverage@15%": 0.8841887565101484, "calibration/coverage@20%": 0.9659603413447664, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5516467755012299, "calibration/ece": 0.23416966717296384, "calibration/mean_confidence": 0.6035169900806124, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0038194444444444643, "completions/max_length": 4006.2, "completions/max_terminated_length": 4006.2, "completions/mean_length": 1551.52646484375, "completions/mean_terminated_length": 1557.5523193359375, "completions/min_length": 0.0, "completions/min_terminated_length": 500.4, "epoch": 2.1871976600292498, "grad_norm": 0.002537149004638195, "learning_rate": 7.8125e-07, "loss": -0.006, "num_tokens": 2644336304.0, "reward": 1.0250213384628295, "reward_std": 0.09622730314731598, "rewards/accuracy_reward": 0.7448784708976746, "rewards/brier_reward": 0.8241114974021911, "rewards/confidence_uniqueness_reward": 0.9452796816825867, "rewards/format_reward": 0.9961805462837219, "rewards/frontier_coverage_0": 0.003916370496153831, "rewards/frontier_coverage_1": 0.003916370496153831, "rewards/frontier_coverage_10": 0.05034622177481651, "rewards/frontier_coverage_15": 0.11719416230916976, "rewards/frontier_coverage_20": 0.19946504831314088, "rewards/frontier_coverage_25": 0.2970153570175171, "rewards/frontier_coverage_5": 0.004244742169976235, "rewards/frontier_entropy_batch_reward": -0.32115537524223325, "signal/accuracy_reward/centered_abs_mean": 0.10778537392616272, "signal/accuracy_reward/group_std_mean": 0.14330336451530457, "signal/accuracy_reward/group_zero_std_frac": 0.5888888835906982, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9045879483222962, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05389268696308136, "signal/advantage_abs_mean": 0.7803467512130737, "signal/advantage_pre_scale_abs_mean": 0.07378631830215454, "signal/advantage_pre_scale_std": 0.12085007727146149, "signal/advantage_std": 0.9827847957611084, "signal/brier_reward/centered_abs_mean": 0.1151393249630928, "signal/brier_reward/group_std_mean": 0.14573339223861695, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1954524338245392, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011513932794332504, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018627097085118293, "signal/confidence_uniqueness_reward/group_std_mean": 0.02717142626643181, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03194965198636055, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018627098063006998, "signal/format_reward/centered_abs_mean": 0.005013020779006183, "signal/format_reward/group_std_mean": 0.010137713141739368, "signal/format_reward/group_zero_std_frac": 0.955555546283722, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.043760602921247484, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0025065103895030917, "signal/frontier_coverage_0/centered_abs_mean": 0.1600848525762558, "signal/frontier_coverage_0/group_std_mean": 0.2041901171207428, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03885265812277794, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0022892132867127655, "signal/frontier_coverage_1/centered_abs_mean": 0.1600848525762558, "signal/frontier_coverage_1/group_std_mean": 0.2041901171207428, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03885265812277794, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0022892132867127655, "signal/frontier_coverage_10/centered_abs_mean": 0.0670699842274189, "signal/frontier_coverage_10/group_std_mean": 0.08348270207643509, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01628856398165226, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009591008070856333, "signal/frontier_coverage_15/centered_abs_mean": 0.08440963327884674, "signal/frontier_coverage_15/group_std_mean": 0.10362496972084045, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020565735176205635, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012070577591657638, "signal/frontier_coverage_20/centered_abs_mean": 0.1160733938217163, "signal/frontier_coverage_20/group_std_mean": 0.143232861161232, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028256673365831375, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001659849495626986, "signal/frontier_coverage_25/centered_abs_mean": 0.15518866181373597, "signal/frontier_coverage_25/group_std_mean": 0.1923240453004837, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037742793560028076, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022191978525370358, "signal/frontier_coverage_5/centered_abs_mean": 0.15902412235736846, "signal/frontier_coverage_5/group_std_mean": 0.20288212597370148, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.038597740978002545, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002274044952355325, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3329595446586609, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39746089577674865, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5680349946022034, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033295954763889316, "step": 910 }, { "calibration/aurc": 0.08763255499847859, "calibration/batch_distribution_entropy": 0.9593239357381013, "calibration/buffer_distribution_entropy": 0.9749175365346063, "calibration/confidence_entropy": 0.4850151497736473, "calibration/coverage@0%": 0.1297758230030463, "calibration/coverage@1%": 0.2983730826460291, "calibration/coverage@10%": 0.6446673665017447, "calibration/coverage@15%": 0.7576914950817645, "calibration/coverage@20%": 0.8550639094863646, "calibration/coverage@25%": 0.9362886378997463, "calibration/coverage@30%": 0.9807291666666668, "calibration/coverage@5%": 0.4771018238588258, "calibration/ece": 0.16642530184501453, "calibration/mean_confidence": 0.5903065416765859, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00512152777777779, "completions/max_length": 4016.8, "completions/max_terminated_length": 4016.8, "completions/mean_length": 1555.1954833984375, "completions/mean_terminated_length": 1563.224755859375, "completions/min_length": 0.0, "completions/min_terminated_length": 467.0, "epoch": 2.1991975100311247, "grad_norm": 0.002537642838433385, "learning_rate": 7.512019230769231e-07, "loss": -0.009, "num_tokens": 2665345692.0, "reward": 1.0272498369216918, "reward_std": 0.10110396295785903, "rewards/accuracy_reward": 0.7412326574325562, "rewards/brier_reward": 0.8351900458335877, "rewards/confidence_uniqueness_reward": 0.9449792385101319, "rewards/format_reward": 0.9948784708976746, "rewards/frontier_coverage_0": 0.011141146440058947, "rewards/frontier_coverage_1": 0.011141146440058947, "rewards/frontier_coverage_10": 0.059621766209602356, "rewards/frontier_coverage_15": 0.12630040645599366, "rewards/frontier_coverage_20": 0.21101914644241332, "rewards/frontier_coverage_25": 0.30998865365982053, "rewards/frontier_coverage_5": 0.011431800480931998, "rewards/frontier_entropy_batch_reward": -0.29413830637931826, "signal/accuracy_reward/centered_abs_mean": 0.11163737028837203, "signal/accuracy_reward/group_std_mean": 0.14887254685163498, "signal/accuracy_reward/group_zero_std_frac": 0.569444453716278, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9359636783599854, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05581868514418602, "signal/advantage_abs_mean": 0.7702690005302429, "signal/advantage_pre_scale_abs_mean": 0.07702369540929795, "signal/advantage_pre_scale_std": 0.12798326462507248, "signal/advantage_std": 0.9828004717826844, "signal/brier_reward/centered_abs_mean": 0.10626696497201919, "signal/brier_reward/group_std_mean": 0.1377051830291748, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17910066843032837, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010626696608960628, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021018927916884422, "signal/confidence_uniqueness_reward/group_std_mean": 0.029378090426325797, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.035751673951745035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021018928149715067, "signal/format_reward/centered_abs_mean": 0.007807074673473835, "signal/format_reward/group_std_mean": 0.013004416413605214, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06638518832623959, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0039035373367369173, "signal/frontier_coverage_0/centered_abs_mean": 0.14786854684352874, "signal/frontier_coverage_0/group_std_mean": 0.19159375727176667, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0357651524245739, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021145202219486235, "signal/frontier_coverage_1/centered_abs_mean": 0.14786854684352874, "signal/frontier_coverage_1/group_std_mean": 0.19159375727176667, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0357651524245739, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021145202219486235, "signal/frontier_coverage_10/centered_abs_mean": 0.062376074492931366, "signal/frontier_coverage_10/group_std_mean": 0.07864119410514832, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015165227092802524, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008919778745621442, "signal/frontier_coverage_15/centered_abs_mean": 0.08264107257127762, "signal/frontier_coverage_15/group_std_mean": 0.10196218788623809, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.019957508146762847, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011817673221230507, "signal/frontier_coverage_20/centered_abs_mean": 0.11547355949878693, "signal/frontier_coverage_20/group_std_mean": 0.14330800771713256, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027816576510667802, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016512719681486487, "signal/frontier_coverage_25/centered_abs_mean": 0.15543023347854615, "signal/frontier_coverage_25/group_std_mean": 0.1938639521598816, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.037404580414295195, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022226523142307997, "signal/frontier_coverage_5/centered_abs_mean": 0.1467347264289856, "signal/frontier_coverage_5/group_std_mean": 0.19018640220165253, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03549126200377941, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002098306594416499, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3218979060649872, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3870994865894318, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5446942985057831, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03218979090452194, "step": 915 }, { "calibration/aurc": 0.10810832702896182, "calibration/batch_distribution_entropy": 0.9625593780795281, "calibration/buffer_distribution_entropy": 0.9749842011690901, "calibration/confidence_entropy": 0.4886371192108515, "calibration/coverage@0%": 0.14716266762812186, "calibration/coverage@1%": 0.188431210385539, "calibration/coverage@10%": 0.6588592644298024, "calibration/coverage@15%": 0.7659079756924984, "calibration/coverage@20%": 0.8385027357046191, "calibration/coverage@25%": 0.8986056621054501, "calibration/coverage@30%": 0.9289297773160363, "calibration/coverage@5%": 0.3055369041028165, "calibration/ece": 0.15420951834596666, "calibration/mean_confidence": 0.5739624371588201, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3969.8, "completions/max_terminated_length": 3969.8, "completions/mean_length": 1547.8402099609375, "completions/mean_terminated_length": 1553.94873046875, "completions/min_length": 0.0, "completions/min_terminated_length": 391.4, "epoch": 2.2111973600329997, "grad_norm": 0.0025333850644528866, "learning_rate": 7.211538461538461e-07, "loss": -0.0053, "num_tokens": 2686252779.0, "reward": 1.0309239864349364, "reward_std": 0.10125423967838287, "rewards/accuracy_reward": 0.7498263835906982, "rewards/brier_reward": 0.8307314157485962, "rewards/confidence_uniqueness_reward": 0.9464572906494141, "rewards/format_reward": 0.99609375, "rewards/frontier_coverage_0": 0.004023569263517856, "rewards/frontier_coverage_1": 0.004023569263517856, "rewards/frontier_coverage_10": 0.0525899201631546, "rewards/frontier_coverage_15": 0.12458169758319855, "rewards/frontier_coverage_20": 0.2097533941268921, "rewards/frontier_coverage_25": 0.30895119309425356, "rewards/frontier_coverage_5": 0.004381868522614241, "rewards/frontier_entropy_batch_reward": -0.29883754849433897, "signal/accuracy_reward/centered_abs_mean": 0.1223415806889534, "signal/accuracy_reward/group_std_mean": 0.1568053334951401, "signal/accuracy_reward/group_zero_std_frac": 0.5722222447395324, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0544026374816895, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0611707903444767, "signal/advantage_abs_mean": 0.7869542241096497, "signal/advantage_pre_scale_abs_mean": 0.07895849943161011, "signal/advantage_pre_scale_std": 0.12843640744686127, "signal/advantage_std": 0.9827647566795349, "signal/brier_reward/centered_abs_mean": 0.1082388699054718, "signal/brier_reward/group_std_mean": 0.1389143019914627, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18672825992107392, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010823887214064598, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019709834456443788, "signal/confidence_uniqueness_reward/group_std_mean": 0.028117352351546287, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03400571942329407, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019709833664819597, "signal/format_reward/centered_abs_mean": 0.006776258768513799, "signal/format_reward/group_std_mean": 0.012154985405504703, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05845015123486519, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0033881293842568994, "signal/frontier_coverage_0/centered_abs_mean": 0.1616591066122055, "signal/frontier_coverage_0/group_std_mean": 0.20822520554065704, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0398515485227108, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002311725215986371, "signal/frontier_coverage_1/centered_abs_mean": 0.1616591066122055, "signal/frontier_coverage_1/group_std_mean": 0.20822520554065704, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0398515485227108, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002311725215986371, "signal/frontier_coverage_10/centered_abs_mean": 0.06490998640656472, "signal/frontier_coverage_10/group_std_mean": 0.08136135190725327, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016016687825322153, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009282128070481121, "signal/frontier_coverage_15/centered_abs_mean": 0.08258948773145676, "signal/frontier_coverage_15/group_std_mean": 0.1021083876490593, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.0203911405056715, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001181029691360891, "signal/frontier_coverage_20/centered_abs_mean": 0.1151643916964531, "signal/frontier_coverage_20/group_std_mean": 0.1425256460905075, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.028438878804445268, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016468508169054986, "signal/frontier_coverage_25/centered_abs_mean": 0.1551089197397232, "signal/frontier_coverage_25/group_std_mean": 0.19257171154022218, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03830631747841835, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022180575411766767, "signal/frontier_coverage_5/centered_abs_mean": 0.16062280237674714, "signal/frontier_coverage_5/group_std_mean": 0.206931734085083, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03959641382098198, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002296905964612961, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3264326810836792, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39315393567085266, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5637098908424377, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032643269002437594, "step": 920 }, { "calibration/aurc": 0.0581684721366415, "calibration/batch_distribution_entropy": 0.951823492836569, "calibration/buffer_distribution_entropy": 0.9756385764474949, "calibration/confidence_entropy": 0.4881648988825013, "calibration/coverage@0%": 0.1203125, "calibration/coverage@1%": 0.22239583333333335, "calibration/coverage@10%": 0.8208333333333332, "calibration/coverage@15%": 0.8916666666666666, "calibration/coverage@20%": 0.9229166666666668, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6442708333333333, "calibration/ece": 0.22919932812500005, "calibration/mean_confidence": 0.6105329635416666, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002256944444444464, "completions/max_length": 3890.8, "completions/max_terminated_length": 3890.8, "completions/mean_length": 1462.75087890625, "completions/mean_terminated_length": 1466.093701171875, "completions/min_length": 80.4, "completions/min_terminated_length": 406.0, "epoch": 2.2231972100348747, "grad_norm": 0.002696170937269926, "learning_rate": 6.911057692307694e-07, "loss": -0.0031, "num_tokens": 2706173173.0, "reward": 1.0313870668411256, "reward_std": 0.09161647409200668, "rewards/accuracy_reward": 0.7465277910232544, "rewards/brier_reward": 0.8327569127082824, "rewards/confidence_uniqueness_reward": 0.948668384552002, "rewards/format_reward": 0.9977430462837219, "rewards/frontier_coverage_0": 0.005679074954241514, "rewards/frontier_coverage_1": 0.005679074954241514, "rewards/frontier_coverage_10": 0.05745949521660805, "rewards/frontier_coverage_15": 0.12821974307298661, "rewards/frontier_coverage_20": 0.21377132534980775, "rewards/frontier_coverage_25": 0.3111018896102905, "rewards/frontier_coverage_5": 0.00600547194480896, "rewards/frontier_entropy_batch_reward": -0.29300045371055605, "signal/accuracy_reward/centered_abs_mean": 0.10111762136220932, "signal/accuracy_reward/group_std_mean": 0.13440315127372743, "signal/accuracy_reward/group_zero_std_frac": 0.6166666626930237, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.913372540473938, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05055881068110466, "signal/advantage_abs_mean": 0.776658010482788, "signal/advantage_pre_scale_abs_mean": 0.07023707553744316, "signal/advantage_pre_scale_std": 0.11647895723581314, "signal/advantage_std": 0.9826768517494202, "signal/brier_reward/centered_abs_mean": 0.1053426593542099, "signal/brier_reward/group_std_mean": 0.13604272305965423, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19112329483032225, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010534266009926796, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016748364828526974, "signal/confidence_uniqueness_reward/group_std_mean": 0.022399993613362312, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.030245038866996764, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016748364781960846, "signal/format_reward/centered_abs_mean": 0.0037977430853061377, "signal/format_reward/group_std_mean": 0.0062358868308365345, "signal/format_reward/group_zero_std_frac": 0.9777777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.033247584290802476, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0018988715426530689, "signal/frontier_coverage_0/centered_abs_mean": 0.14969644248485564, "signal/frontier_coverage_0/group_std_mean": 0.19280532896518707, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03871278986334801, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002140659070573747, "signal/frontier_coverage_1/centered_abs_mean": 0.14969644248485564, "signal/frontier_coverage_1/group_std_mean": 0.19280532896518707, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03871278986334801, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002140659070573747, "signal/frontier_coverage_10/centered_abs_mean": 0.06155589893460274, "signal/frontier_coverage_10/group_std_mean": 0.07702510505914688, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01599162146449089, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008802493568509818, "signal/frontier_coverage_15/centered_abs_mean": 0.0844751238822937, "signal/frontier_coverage_15/group_std_mean": 0.1046410083770752, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021979451179504395, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012079942971467972, "signal/frontier_coverage_20/centered_abs_mean": 0.11722851842641831, "signal/frontier_coverage_20/group_std_mean": 0.14596150517463685, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030499268695712088, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001676367805339396, "signal/frontier_coverage_25/centered_abs_mean": 0.1558176100254059, "signal/frontier_coverage_25/group_std_mean": 0.19490368366241456, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04053145200014115, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002228191774338484, "signal/frontier_coverage_5/centered_abs_mean": 0.14871238470077514, "signal/frontier_coverage_5/group_std_mean": 0.19156993329524993, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03845802396535873, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0021265871357172726, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32605803608894346, "signal/frontier_entropy_batch_reward/group_std_mean": 0.391255658864975, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5923472046852112, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03260580375790596, "step": 925 }, { "calibration/aurc": 0.043213178725075574, "calibration/batch_distribution_entropy": 0.9438239170625191, "calibration/buffer_distribution_entropy": 0.9750450086297026, "calibration/confidence_entropy": 0.4616332302786533, "calibration/coverage@0%": 0.2288194444444444, "calibration/coverage@1%": 0.4826471560846561, "calibration/coverage@10%": 0.8594742063492063, "calibration/coverage@15%": 0.9177166005291004, "calibration/coverage@20%": 0.9734375, "calibration/coverage@25%": 0.9859375, "calibration/coverage@30%": 0.9989583333333332, "calibration/coverage@5%": 0.759077380952381, "calibration/ece": 0.22953177084160054, "calibration/mean_confidence": 0.6075171478091932, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444442, "completions/max_length": 3974.2, "completions/max_terminated_length": 3974.2, "completions/mean_length": 1501.35712890625, "completions/mean_terminated_length": 1505.9070068359374, "completions/min_length": 183.8, "completions/min_terminated_length": 446.6, "epoch": 2.2351970600367497, "grad_norm": 0.002601813990622759, "learning_rate": 6.610576923076924e-07, "loss": -0.0069, "num_tokens": 2726565767.0, "reward": 1.0443670511245728, "reward_std": 0.09180981665849686, "rewards/accuracy_reward": 0.7729166626930237, "rewards/brier_reward": 0.8460847616195679, "rewards/confidence_uniqueness_reward": 0.9460752367973327, "rewards/format_reward": 0.9969618082046509, "rewards/frontier_coverage_0": 0.006405340367928147, "rewards/frontier_coverage_1": 0.006405340367928147, "rewards/frontier_coverage_10": 0.0647600881755352, "rewards/frontier_coverage_15": 0.14153310060501098, "rewards/frontier_coverage_20": 0.234138023853302, "rewards/frontier_coverage_25": 0.33819299936294556, "rewards/frontier_coverage_5": 0.006916235387325287, "rewards/frontier_entropy_batch_reward": -0.31204586625099184, "signal/accuracy_reward/centered_abs_mean": 0.10067274197936057, "signal/accuracy_reward/group_std_mean": 0.1319912225008011, "signal/accuracy_reward/group_zero_std_frac": 0.6277777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8978427886962891, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05033637098968029, "signal/advantage_abs_mean": 0.7825265645980835, "signal/advantage_pre_scale_abs_mean": 0.07099922001361847, "signal/advantage_pre_scale_std": 0.11690742075443268, "signal/advantage_std": 0.9826888680458069, "signal/brier_reward/centered_abs_mean": 0.10140125602483749, "signal/brier_reward/group_std_mean": 0.1296430230140686, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18235966563224792, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010140126198530197, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018910757824778558, "signal/confidence_uniqueness_reward/group_std_mean": 0.02669762820005417, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034101661667227744, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018910758662968874, "signal/format_reward/centered_abs_mean": 0.005365668423473835, "signal/format_reward/group_std_mean": 0.009850092232227325, "signal/format_reward/group_zero_std_frac": 0.9611111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0482855424284935, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0026828342117369173, "signal/frontier_coverage_0/centered_abs_mean": 0.1465074121952057, "signal/frontier_coverage_0/group_std_mean": 0.18885864913463593, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03758770748972893, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020950559992343187, "signal/frontier_coverage_1/centered_abs_mean": 0.1465074121952057, "signal/frontier_coverage_1/group_std_mean": 0.18885864913463593, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03758770748972893, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020950559992343187, "signal/frontier_coverage_10/centered_abs_mean": 0.061785966902971265, "signal/frontier_coverage_10/group_std_mean": 0.07676958590745926, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015890642628073694, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008835393469780684, "signal/frontier_coverage_15/centered_abs_mean": 0.08617945164442062, "signal/frontier_coverage_15/group_std_mean": 0.10580342113971711, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02223038859665394, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012323661474511027, "signal/frontier_coverage_20/centered_abs_mean": 0.1186860054731369, "signal/frontier_coverage_20/group_std_mean": 0.1463834047317505, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030619293823838233, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001697209826670587, "signal/frontier_coverage_25/centered_abs_mean": 0.15584073662757875, "signal/frontier_coverage_25/group_std_mean": 0.1927361845970154, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.040186097472906114, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022285224869847296, "signal/frontier_coverage_5/centered_abs_mean": 0.14527685940265656, "signal/frontier_coverage_5/group_std_mean": 0.18733657896518707, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03727264627814293, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020774591015651823, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3365455687046051, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40074809789657595, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6073173880577087, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033654557168483736, "step": 930 }, { "calibration/aurc": 0.06769820110507842, "calibration/batch_distribution_entropy": 0.9640017323999144, "calibration/buffer_distribution_entropy": 0.9743043475159177, "calibration/confidence_entropy": 0.48128372910858425, "calibration/coverage@0%": 0.14768015951274746, "calibration/coverage@1%": 0.2824490651713151, "calibration/coverage@10%": 0.7675570611615644, "calibration/coverage@15%": 0.8934676184560824, "calibration/coverage@20%": 0.9355113762638668, "calibration/coverage@25%": 0.9682958325730248, "calibration/coverage@30%": 0.9947643979057592, "calibration/coverage@5%": 0.5556909172947986, "calibration/ece": 0.20573712648435638, "calibration/mean_confidence": 0.5876263315088851, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888884, "completions/max_length": 4076.6, "completions/max_terminated_length": 4076.6, "completions/mean_length": 1563.7186767578125, "completions/mean_terminated_length": 1570.95537109375, "completions/min_length": 100.6, "completions/min_terminated_length": 421.8, "epoch": 2.2471969100386247, "grad_norm": 0.002489317674189806, "learning_rate": 6.310096153846154e-07, "loss": -0.0074, "num_tokens": 2747662654.0, "reward": 1.0286352634429932, "reward_std": 0.0958052396774292, "rewards/accuracy_reward": 0.7419270753860474, "rewards/brier_reward": 0.8244329452514648, "rewards/confidence_uniqueness_reward": 0.9473481178283691, "rewards/format_reward": 0.9954861164093017, "rewards/frontier_coverage_0": 0.0055387676926329735, "rewards/frontier_coverage_1": 0.0055387676926329735, "rewards/frontier_coverage_10": 0.061582712829113005, "rewards/frontier_coverage_15": 0.13054971098899842, "rewards/frontier_coverage_20": 0.2142077714204788, "rewards/frontier_coverage_25": 0.30671278238296507, "rewards/frontier_coverage_5": 0.006274393014609814, "rewards/frontier_entropy_batch_reward": -0.27694271206855775, "signal/accuracy_reward/centered_abs_mean": 0.09981011301279068, "signal/accuracy_reward/group_std_mean": 0.1391371890902519, "signal/accuracy_reward/group_zero_std_frac": 0.5805555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8497473239898682, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04990505650639534, "signal/advantage_abs_mean": 0.7577031970024108, "signal/advantage_pre_scale_abs_mean": 0.07107750698924065, "signal/advantage_pre_scale_std": 0.12123489528894424, "signal/advantage_std": 0.9827822804450989, "signal/brier_reward/centered_abs_mean": 0.10653214752674103, "signal/brier_reward/group_std_mean": 0.13699764758348465, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18180096745491028, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010653214715421199, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020202530920505522, "signal/confidence_uniqueness_reward/group_std_mean": 0.028109391033649445, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03422162234783173, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020202531712129714, "signal/format_reward/centered_abs_mean": 0.007389322947710752, "signal/format_reward/group_std_mean": 0.01228577308356762, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06149484626948833, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003694661473855376, "signal/frontier_coverage_0/centered_abs_mean": 0.15176095068454742, "signal/frontier_coverage_0/group_std_mean": 0.19587263464927673, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03702950105071068, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021701816003769636, "signal/frontier_coverage_1/centered_abs_mean": 0.15176095068454742, "signal/frontier_coverage_1/group_std_mean": 0.19587263464927673, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03702950105071068, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021701816003769636, "signal/frontier_coverage_10/centered_abs_mean": 0.06348835378885269, "signal/frontier_coverage_10/group_std_mean": 0.07822826206684112, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015514366328716278, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009078834671527147, "signal/frontier_coverage_15/centered_abs_mean": 0.08382693082094192, "signal/frontier_coverage_15/group_std_mean": 0.10365720987319946, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.020487995445728303, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011987250996753573, "signal/frontier_coverage_20/centered_abs_mean": 0.11406213641166688, "signal/frontier_coverage_20/group_std_mean": 0.14230601191520692, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027876751869916915, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016310885082930326, "signal/frontier_coverage_25/centered_abs_mean": 0.14945789575576782, "signal/frontier_coverage_25/group_std_mean": 0.18777556121349334, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03652668297290802, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00213724784553051, "signal/frontier_coverage_5/centered_abs_mean": 0.15007020831108092, "signal/frontier_coverage_5/group_std_mean": 0.19369837045669555, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0366180919110775, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002146004047244787, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.328479528427124, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3947932004928589, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5613398909568786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03284795358777046, "step": 935 }, { "calibration/aurc": 0.09320443429342211, "calibration/batch_distribution_entropy": 0.9289924908833758, "calibration/buffer_distribution_entropy": 0.974310713124229, "calibration/confidence_entropy": 0.48827981140169613, "calibration/coverage@0%": 0.2713541666666667, "calibration/coverage@1%": 0.34531249999999997, "calibration/coverage@10%": 0.6015625, "calibration/coverage@15%": 0.7182291666666667, "calibration/coverage@20%": 0.8119791666666666, "calibration/coverage@25%": 0.9072916666666666, "calibration/coverage@30%": 0.9635416666666667, "calibration/coverage@5%": 0.5192708333333333, "calibration/ece": 0.15531255208333333, "calibration/mean_confidence": 0.6371385937499999, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888888838, "completions/max_length": 3889.4, "completions/max_terminated_length": 3889.4, "completions/mean_length": 1514.1761474609375, "completions/mean_terminated_length": 1517.495166015625, "completions/min_length": 0.0, "completions/min_terminated_length": 384.8, "epoch": 2.2591967600404996, "grad_norm": 0.002634695265442133, "learning_rate": 6.009615384615385e-07, "loss": -0.0003, "num_tokens": 2768192267.0, "reward": 1.0425429582595824, "reward_std": 0.09554083198308945, "rewards/accuracy_reward": 0.7733506917953491, "rewards/brier_reward": 0.8325282812118531, "rewards/confidence_uniqueness_reward": 0.9469174981117249, "rewards/format_reward": 0.9979166626930237, "rewards/frontier_coverage_0": -0.013554162811487913, "rewards/frontier_coverage_1": -0.013554162811487913, "rewards/frontier_coverage_10": 0.06425249055027962, "rewards/frontier_coverage_15": 0.13952557295560836, "rewards/frontier_coverage_20": 0.22933673560619355, "rewards/frontier_coverage_25": 0.3282873511314392, "rewards/frontier_coverage_5": -0.012855465337634087, "rewards/frontier_entropy_batch_reward": -0.313518762588501, "signal/accuracy_reward/centered_abs_mean": 0.10990125685930252, "signal/accuracy_reward/group_std_mean": 0.1430188611149788, "signal/accuracy_reward/group_zero_std_frac": 0.5972222208976745, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9571273446083068, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05495062842965126, "signal/advantage_abs_mean": 0.7779755830764771, "signal/advantage_pre_scale_abs_mean": 0.07414216324687004, "signal/advantage_pre_scale_std": 0.12053094506263733, "signal/advantage_std": 0.9827156066894531, "signal/brier_reward/centered_abs_mean": 0.10234658420085907, "signal/brier_reward/group_std_mean": 0.13204047679901124, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18013457655906678, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010234658606350422, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01755792982876301, "signal/confidence_uniqueness_reward/group_std_mean": 0.02490374743938446, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.031031015142798424, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017557929968461394, "signal/format_reward/centered_abs_mean": 0.0038302951259538533, "signal/format_reward/group_std_mean": 0.007910448359325528, "signal/format_reward/group_zero_std_frac": 0.9666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0327956123277545, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0019151475629769267, "signal/frontier_coverage_0/centered_abs_mean": 0.14841342717409134, "signal/frontier_coverage_0/group_std_mean": 0.19171408116817473, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03722411021590233, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021223119460046292, "signal/frontier_coverage_1/centered_abs_mean": 0.14841342717409134, "signal/frontier_coverage_1/group_std_mean": 0.19171408116817473, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03722411021590233, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021223119460046292, "signal/frontier_coverage_10/centered_abs_mean": 0.06261468380689621, "signal/frontier_coverage_10/group_std_mean": 0.07751820534467697, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015907155349850655, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008953900076448917, "signal/frontier_coverage_15/centered_abs_mean": 0.0870408520102501, "signal/frontier_coverage_15/group_std_mean": 0.10760061740875244, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.022193774580955505, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012446841225028037, "signal/frontier_coverage_20/centered_abs_mean": 0.12070697844028473, "signal/frontier_coverage_20/group_std_mean": 0.1497057557106018, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.030737898126244546, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001726109767332673, "signal/frontier_coverage_25/centered_abs_mean": 0.15899961888790132, "signal/frontier_coverage_25/group_std_mean": 0.1976221203804016, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0404223270714283, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022736945655196907, "signal/frontier_coverage_5/centered_abs_mean": 0.14664799571037293, "signal/frontier_coverage_5/group_std_mean": 0.18951908648014068, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03677628450095653, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0020970664452761413, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33282300233840945, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3974955141544342, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5917717456817627, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03328230008482933, "step": 940 }, { "calibration/aurc": 0.08682412860157046, "calibration/batch_distribution_entropy": 0.952244168172321, "calibration/buffer_distribution_entropy": 0.9739697354595019, "calibration/confidence_entropy": 0.48189116386131464, "calibration/coverage@0%": 0.057291666666666664, "calibration/coverage@1%": 0.109375, "calibration/coverage@10%": 0.7216043307086615, "calibration/coverage@15%": 0.7852403215223097, "calibration/coverage@20%": 0.8796382874015748, "calibration/coverage@25%": 0.9448818897637796, "calibration/coverage@30%": 0.9616797900262467, "calibration/coverage@5%": 0.5338459645669291, "calibration/ece": 0.1673374657562336, "calibration/mean_confidence": 0.5954183577345801, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.001388888888888906, "completions/max_length": 3953.4, "completions/max_terminated_length": 3953.4, "completions/mean_length": 1553.2009521484374, "completions/mean_terminated_length": 1555.3579345703124, "completions/min_length": 203.0, "completions/min_terminated_length": 542.6, "epoch": 2.2711966100423746, "grad_norm": 0.0025092982687056065, "learning_rate": 5.709134615384615e-07, "loss": 0.0032, "num_tokens": 2789206870.0, "reward": 1.0252916812896729, "reward_std": 0.10383205115795135, "rewards/accuracy_reward": 0.7349826335906983, "rewards/brier_reward": 0.8226475954055786, "rewards/confidence_uniqueness_reward": 0.9491451501846313, "rewards/format_reward": 0.9986111164093018, "rewards/frontier_coverage_0": 0.002067159628495574, "rewards/frontier_coverage_1": 0.002067159628495574, "rewards/frontier_coverage_10": 0.060900063067674634, "rewards/frontier_coverage_15": 0.1282704308629036, "rewards/frontier_coverage_20": 0.20991497933864595, "rewards/frontier_coverage_25": 0.29933114647865294, "rewards/frontier_coverage_5": 0.0027842882089316847, "rewards/frontier_entropy_batch_reward": -0.2877081334590912, "signal/accuracy_reward/centered_abs_mean": 0.12976887822151184, "signal/accuracy_reward/group_std_mean": 0.16400441527366638, "signal/accuracy_reward/group_zero_std_frac": 0.5527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0821083426475524, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06488443911075592, "signal/advantage_abs_mean": 0.7864845633506775, "signal/advantage_pre_scale_abs_mean": 0.08278766125440598, "signal/advantage_pre_scale_std": 0.12968444377183913, "signal/advantage_std": 0.9828070282936097, "signal/brier_reward/centered_abs_mean": 0.11329959332942963, "signal/brier_reward/group_std_mean": 0.14356452822685242, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.189946448802948, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011329959891736508, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.016427206248044966, "signal/confidence_uniqueness_reward/group_std_mean": 0.021708906069397925, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02749452255666256, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0016427206806838512, "signal/format_reward/centered_abs_mean": 0.002452256949618459, "signal/format_reward/group_std_mean": 0.004412041790783405, "signal/format_reward/group_zero_std_frac": 0.9833333253860473, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.019744722917675973, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0012261284748092294, "signal/frontier_coverage_0/centered_abs_mean": 0.15941068530082703, "signal/frontier_coverage_0/group_std_mean": 0.20225572884082793, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.038180924206972125, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002279572864063084, "signal/frontier_coverage_1/centered_abs_mean": 0.15941068530082703, "signal/frontier_coverage_1/group_std_mean": 0.20225572884082793, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.038180924206972125, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002279572864063084, "signal/frontier_coverage_10/centered_abs_mean": 0.06408170610666275, "signal/frontier_coverage_10/group_std_mean": 0.07931052595376968, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015410272032022476, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009163683629594743, "signal/frontier_coverage_15/centered_abs_mean": 0.09135069847106933, "signal/frontier_coverage_15/group_std_mean": 0.11312156021595002, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021967886388301848, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013063149759545921, "signal/frontier_coverage_20/centered_abs_mean": 0.12903558164834977, "signal/frontier_coverage_20/group_std_mean": 0.15990031361579896, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03101343587040901, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018452089047059418, "signal/frontier_coverage_25/centered_abs_mean": 0.17198951840400695, "signal/frontier_coverage_25/group_std_mean": 0.21296056509017944, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0413208082318306, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00245945006608963, "signal/frontier_coverage_5/centered_abs_mean": 0.15726107358932495, "signal/frontier_coverage_5/group_std_mean": 0.19959587454795838, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03766716942191124, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0022488333052024245, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3204162836074829, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38639105558395387, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5392665505409241, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032041627913713455, "step": 945 }, { "calibration/aurc": 0.12652919135947716, "calibration/batch_distribution_entropy": 0.9568221850468424, "calibration/buffer_distribution_entropy": 0.9745851992890928, "calibration/confidence_entropy": 0.4825643832794569, "calibration/coverage@0%": 0.05265019458052019, "calibration/coverage@1%": 0.09431686124718687, "calibration/coverage@10%": 0.39464082582382354, "calibration/coverage@15%": 0.5999637537489063, "calibration/coverage@20%": 0.8665215208277772, "calibration/coverage@25%": 0.9445594362487075, "calibration/coverage@30%": 0.9738683464223385, "calibration/coverage@5%": 0.2785393316419391, "calibration/ece": 0.18753364804086017, "calibration/mean_confidence": 0.560442754265039, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 3963.4, "completions/max_terminated_length": 3963.4, "completions/mean_length": 1648.2306396484375, "completions/mean_terminated_length": 1653.50927734375, "completions/min_length": 81.2, "completions/min_terminated_length": 439.8, "epoch": 2.2831964600442496, "grad_norm": 0.002521132817491889, "learning_rate": 5.408653846153847e-07, "loss": -0.0058, "num_tokens": 2811319799.0, "reward": 1.0242114305496215, "reward_std": 0.10054776221513748, "rewards/accuracy_reward": 0.7371527791023255, "rewards/brier_reward": 0.8280610561370849, "rewards/confidence_uniqueness_reward": 0.9465069651603699, "rewards/format_reward": 0.996875, "rewards/frontier_coverage_0": 0.01441353103145957, "rewards/frontier_coverage_1": 0.01441353103145957, "rewards/frontier_coverage_10": 0.06537414789199829, "rewards/frontier_coverage_15": 0.13175831288099288, "rewards/frontier_coverage_20": 0.21454677879810333, "rewards/frontier_coverage_25": 0.30874282121658325, "rewards/frontier_coverage_5": 0.015491097513586283, "rewards/frontier_entropy_batch_reward": -0.31195072531700135, "signal/accuracy_reward/centered_abs_mean": 0.11291232705116272, "signal/accuracy_reward/group_std_mean": 0.15053357183933258, "signal/accuracy_reward/group_zero_std_frac": 0.5638889014720917, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9593138813972473, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05645616352558136, "signal/advantage_abs_mean": 0.7690176606178284, "signal/advantage_pre_scale_abs_mean": 0.0772487387061119, "signal/advantage_pre_scale_std": 0.1267082616686821, "signal/advantage_std": 0.9827841877937317, "signal/brier_reward/centered_abs_mean": 0.10452846437692642, "signal/brier_reward/group_std_mean": 0.13697410225868226, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17854879796504974, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010452846810221673, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018856966122984885, "signal/confidence_uniqueness_reward/group_std_mean": 0.026795653998851775, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032309388369321825, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018856966169551014, "signal/format_reward/centered_abs_mean": 0.005110677098855376, "signal/format_reward/group_std_mean": 0.009960832260549068, "signal/format_reward/group_zero_std_frac": 0.9555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04381188787519932, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002555338549427688, "signal/frontier_coverage_0/centered_abs_mean": 0.14766640961170197, "signal/frontier_coverage_0/group_std_mean": 0.19499149024486542, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036027568578720096, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0021116294898092746, "signal/frontier_coverage_1/centered_abs_mean": 0.14766640961170197, "signal/frontier_coverage_1/group_std_mean": 0.19499149024486542, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.036027568578720096, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021116294898092746, "signal/frontier_coverage_10/centered_abs_mean": 0.06306469812989235, "signal/frontier_coverage_10/group_std_mean": 0.07838996946811676, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015405329130589961, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009018251672387123, "signal/frontier_coverage_15/centered_abs_mean": 0.0873841717839241, "signal/frontier_coverage_15/group_std_mean": 0.10764139890670776, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.021306929364800452, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0012495935894548892, "signal/frontier_coverage_20/centered_abs_mean": 0.12113028168678283, "signal/frontier_coverage_20/group_std_mean": 0.15000077486038207, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.02950175330042839, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0017321630381047725, "signal/frontier_coverage_25/centered_abs_mean": 0.16070158481597902, "signal/frontier_coverage_25/group_std_mean": 0.19954589903354644, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03912241980433464, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0022980326786637304, "signal/frontier_coverage_5/centered_abs_mean": 0.1454429507255554, "signal/frontier_coverage_5/group_std_mean": 0.1921371579170227, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03548334017395973, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002079833997413516, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33761860728263854, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4023285984992981, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5767180442810058, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03376186117529869, "step": 950 }, { "epoch": 2.2831964600442496, "eval_calibration/aurc": 0.13675649961578237, "eval_calibration/batch_distribution_entropy": 0.9091355165127851, "eval_calibration/buffer_distribution_entropy": 0.97407149477294, "eval_calibration/confidence_entropy": 0.4783477271683041, "eval_calibration/coverage@0%": 0.23454301075268816, "eval_calibration/coverage@1%": 0.23454301075268816, "eval_calibration/coverage@10%": 0.4637096774193548, "eval_calibration/coverage@15%": 0.6001344086021505, "eval_calibration/coverage@20%": 0.8099798387096774, "eval_calibration/coverage@25%": 0.862231182795699, "eval_calibration/coverage@30%": 0.984375, "eval_calibration/coverage@5%": 0.3387096774193548, "eval_calibration/ece": 0.21853504704301074, "eval_calibration/mean_confidence": 0.5743662970430107, "eval_completions/clipped_ratio": 0.00347222222222221, "eval_completions/max_length": 3536.0, "eval_completions/max_terminated_length": 3536.0, "eval_completions/mean_length": 1604.8047281901042, "eval_completions/mean_terminated_length": 1610.3856201171875, "eval_completions/min_length": 421.5, "eval_completions/min_terminated_length": 636.0, "eval_loss": 0.0, "eval_num_tokens": 2811319799.0, "eval_reward": 0.9298228621482849, "eval_reward_std": 0.23609469334284464, "eval_rewards/accuracy_reward": 0.6953125, "eval_rewards/brier_reward": 0.8285978237787882, "eval_rewards/confidence_uniqueness_reward": 0.8950509230295817, "eval_rewards/format_reward": 0.995659718910853, "eval_rewards/frontier_coverage_0": 0.04206457252924641, "eval_rewards/frontier_coverage_1": 0.04206457252924641, "eval_rewards/frontier_coverage_10": 0.0667329914867878, "eval_rewards/frontier_coverage_15": 0.12518454591433206, "eval_rewards/frontier_coverage_20": 0.20115434378385544, "eval_rewards/frontier_coverage_25": 0.2872895747423172, "eval_rewards/frontier_coverage_5": 0.04235087055712938, "eval_rewards/frontier_entropy_batch_reward": -0.995659718910853, "eval_runtime": 206.1535, "eval_samples_per_second": 4.851, "eval_signal/accuracy_reward/centered_abs_mean": 0.4056532184282939, "eval_signal/accuracy_reward/group_std_mean": 0.4547837922970454, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8753860890865326, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20282660921414694, "eval_signal/advantage_abs_mean": 0.8655439913272858, "eval_signal/advantage_pre_scale_abs_mean": 0.20619056125481924, "eval_signal/advantage_pre_scale_std": 0.2349847455819448, "eval_signal/advantage_std": 0.9863970478375753, "eval_signal/brier_reward/centered_abs_mean": 0.1640371655424436, "eval_signal/brier_reward/group_std_mean": 0.22055867314338684, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07109572117527325, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.016403717764963705, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045909797151883446, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06451402107874553, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01983573194593191, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004590979932496945, "eval_signal/format_reward/centered_abs_mean": 0.008300781094779571, "eval_signal/format_reward/group_std_mean": 0.021562910017867882, "eval_signal/format_reward/group_zero_std_frac": 0.8888889054457346, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.017497866414487362, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004150390547389786, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2689051379760106, "eval_signal/frontier_coverage_0/group_std_mean": 0.3698471784591675, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.016674190914879244, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0038453434826806188, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2689051379760106, "eval_signal/frontier_coverage_1/group_std_mean": 0.3698471784591675, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.016674190914879244, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0038453434826806188, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.08195547511180241, "eval_signal/frontier_coverage_10/group_std_mean": 0.10345051437616348, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005070453975349665, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00117196326997752, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.14475023746490479, "eval_signal/frontier_coverage_15/group_std_mean": 0.18270048995812735, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.008945515534530083, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002069928372899691, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.23411591102679571, "eval_signal/frontier_coverage_20/group_std_mean": 0.2887367457151413, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.014464873975763718, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003347857428404192, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.3328509529431661, "eval_signal/frontier_coverage_25/group_std_mean": 0.40653078258037567, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.02055966140081485, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004759768722578883, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.2655777111649513, "eval_signal/frontier_coverage_5/group_std_mean": 0.36574364205201465, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.016468066566934187, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0037977612810209394, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008300781094779571, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.021562910017867882, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8888889054457346, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0034995736399044595, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008300781288805107, "eval_steps_per_second": 0.029, "step": 950 }, { "epoch": 2.2831964600442496, "step": 950, "train_probe_calibration/aurc": 0.07581074120148544, "train_probe_calibration/batch_distribution_entropy": 0.899964335250737, "train_probe_calibration/buffer_distribution_entropy": 0.9737900708282261, "train_probe_calibration/confidence_entropy": 0.4739935910661874, "train_probe_calibration/coverage@0%": 0.4527889784946237, "train_probe_calibration/coverage@1%": 0.4527889784946237, "train_probe_calibration/coverage@10%": 0.7192540322580645, "train_probe_calibration/coverage@15%": 0.8946572580645161, "train_probe_calibration/coverage@20%": 0.962869623655914, "train_probe_calibration/coverage@25%": 0.989247311827957, "train_probe_calibration/coverage@30%": 1.0, "train_probe_calibration/coverage@5%": 0.5885416666666666, "train_probe_calibration/ece": 0.213778746639785, "train_probe_calibration/mean_confidence": 0.6232134240591398, "train_probe_completions/clipped_ratio": 0.007638888888888917, "train_probe_completions/max_length": 3633.5, "train_probe_completions/max_terminated_length": 3633.5, "train_probe_completions/mean_length": 1592.7953491210938, "train_probe_completions/mean_terminated_length": 1604.8572794596355, "train_probe_completions/min_length": 89.0, "train_probe_completions/min_terminated_length": 551.3333333333334, "train_probe_loss": 0.0, "train_probe_num_tokens": 2811319799.0, "train_probe_reward": 0.9637367725372314, "train_probe_reward_std": 0.2208165650566419, "train_probe_rewards/accuracy_reward": 0.761284718910853, "train_probe_rewards/brier_reward": 0.8417325516541799, "train_probe_rewards/confidence_uniqueness_reward": 0.8906635443369547, "train_probe_rewards/format_reward": 0.9947916766007742, "train_probe_rewards/frontier_coverage_0": 0.005254412613188227, "train_probe_rewards/frontier_coverage_1": 0.005254412613188227, "train_probe_rewards/frontier_coverage_10": 0.07371875147024791, "train_probe_rewards/frontier_coverage_15": 0.15096323440472284, "train_probe_rewards/frontier_coverage_20": 0.24458193282286325, "train_probe_rewards/frontier_coverage_25": 0.3490742842356364, "train_probe_rewards/frontier_coverage_5": 0.005987585289403796, "train_probe_rewards/frontier_entropy_batch_reward": -0.9947916766007742, "train_probe_runtime": 217.635, "train_probe_samples_per_second": 4.595, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3530273387829463, "train_probe_signal/accuracy_reward/group_std_mean": 0.4242842694123586, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8170821766058604, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17651366939147314, "train_probe_signal/advantage_abs_mean": 0.8060585856437683, "train_probe_signal/advantage_pre_scale_abs_mean": 0.18002791702747345, "train_probe_signal/advantage_pre_scale_std": 0.22036000341176987, "train_probe_signal/advantage_std": 0.986365924278895, "train_probe_signal/brier_reward/centered_abs_mean": 0.15322893857955933, "train_probe_signal/brier_reward/group_std_mean": 0.2087702974677086, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07108517860372861, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.015322894168396791, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04701675598820051, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06953836977481842, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.021755116681257885, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0047016756143420935, "train_probe_signal/format_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/format_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/format_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.022791087937851746, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.0050455727614462376, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23706353455781937, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.34765902161598206, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.01574373881643017, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.003390008544859787, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23706353455781937, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.34765902161598206, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01574373881643017, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003390008544859787, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.08195397506157558, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.10161611934502919, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0054444929119199514, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.00117194183015575, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.1490315372745196, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.18034624059995016, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.00989202270284295, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0021311509578178325, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.2324892282485962, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.2789619415998459, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.015423213442166647, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003324596017288665, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.321807121237119, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.38699104885260266, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021341119272013504, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0046018418700744705, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.23428418238957724, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3439280440409978, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.015559157667060694, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0033502636554961405, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145522892475, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.02946278266608715, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333631356558, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004558217866967122, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091146104969084, "train_probe_steps_per_second": 0.028 }, { "calibration/aurc": 0.14273125825617144, "calibration/batch_distribution_entropy": 0.9550349669352135, "calibration/buffer_distribution_entropy": 0.9742140739784955, "calibration/confidence_entropy": 0.494557200447038, "calibration/coverage@0%": 0.13072916666666667, "calibration/coverage@1%": 0.2260416666666667, "calibration/coverage@10%": 0.4385416666666666, "calibration/coverage@15%": 0.5364583333333334, "calibration/coverage@20%": 0.63125, "calibration/coverage@25%": 0.8817708333333334, "calibration/coverage@30%": 0.9203125, "calibration/coverage@5%": 0.33489583333333334, "calibration/ece": 0.19657210416666665, "calibration/mean_confidence": 0.5674274791666667, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0032986111111110938, "completions/max_length": 3983.8, "completions/max_terminated_length": 3983.8, "completions/mean_length": 1634.6527099609375, "completions/mean_terminated_length": 1640.19443359375, "completions/min_length": 180.2, "completions/min_terminated_length": 544.4, "epoch": 2.2951963100461246, "grad_norm": 0.0024574222043156624, "learning_rate": 5.108173076923077e-07, "loss": -0.0033, "num_tokens": 2833227638.0, "reward": 1.0299370527267455, "reward_std": 0.093611079454422, "rewards/accuracy_reward": 0.7506944417953492, "rewards/brier_reward": 0.8379656314849854, "rewards/confidence_uniqueness_reward": 0.9445121049880981, "rewards/format_reward": 0.9967013835906983, "rewards/frontier_coverage_0": 0.011254264181479812, "rewards/frontier_coverage_1": 0.011254264181479812, "rewards/frontier_coverage_10": 0.06909877061843872, "rewards/frontier_coverage_15": 0.14030956625938415, "rewards/frontier_coverage_20": 0.22847531437873841, "rewards/frontier_coverage_25": 0.32808240652084353, "rewards/frontier_coverage_5": 0.011783378524705767, "rewards/frontier_entropy_batch_reward": -0.3345234453678131, "signal/accuracy_reward/centered_abs_mean": 0.10070529580116272, "signal/accuracy_reward/group_std_mean": 0.139546899497509, "signal/accuracy_reward/group_zero_std_frac": 0.5805555820465088, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8801249861717224, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05035264790058136, "signal/advantage_abs_mean": 0.7685187101364136, "signal/advantage_pre_scale_abs_mean": 0.07071012929081917, "signal/advantage_pre_scale_std": 0.11747289299964905, "signal/advantage_std": 0.9827180862426758, "signal/brier_reward/centered_abs_mean": 0.1026095524430275, "signal/brier_reward/group_std_mean": 0.13352414667606355, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18136341571807862, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010260955616831779, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019099758937954903, "signal/confidence_uniqueness_reward/group_std_mean": 0.026142006739974022, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033878518640995024, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019099759869277478, "signal/format_reward/centered_abs_mean": 0.004763454850763083, "signal/format_reward/group_std_mean": 0.00826217420399189, "signal/format_reward/group_zero_std_frac": 0.9666666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.042343306541442874, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0023817274253815413, "signal/frontier_coverage_0/centered_abs_mean": 0.14085240364074708, "signal/frontier_coverage_0/group_std_mean": 0.18698779344558716, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03563583679497242, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020141893532127143, "signal/frontier_coverage_1/centered_abs_mean": 0.14085240364074708, "signal/frontier_coverage_1/group_std_mean": 0.18698779344558716, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03563583679497242, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020141893532127143, "signal/frontier_coverage_10/centered_abs_mean": 0.06276597455143929, "signal/frontier_coverage_10/group_std_mean": 0.07800111174583435, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01591112706810236, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0008975534467026592, "signal/frontier_coverage_15/centered_abs_mean": 0.09101367890834808, "signal/frontier_coverage_15/group_std_mean": 0.11218550503253936, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.023008933290839195, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013014955911785364, "signal/frontier_coverage_20/centered_abs_mean": 0.12615538388490677, "signal/frontier_coverage_20/group_std_mean": 0.15606312751770018, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03185085244476795, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018040220718830823, "signal/frontier_coverage_25/centered_abs_mean": 0.1652356654405594, "signal/frontier_coverage_25/group_std_mean": 0.205272775888443, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04169749319553375, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002362869819626212, "signal/frontier_coverage_5/centered_abs_mean": 0.13930575549602509, "signal/frontier_coverage_5/group_std_mean": 0.18501150012016296, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0352470863610506, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0019920722115784883, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33024551868438723, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39427871704101564, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5849268555641174, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03302455134689808, "step": 955 }, { "calibration/aurc": 0.1296094462033201, "calibration/batch_distribution_entropy": 0.9691447113783008, "calibration/buffer_distribution_entropy": 0.9751247270327952, "calibration/confidence_entropy": 0.4844039802407488, "calibration/coverage@0%": 0.190625, "calibration/coverage@1%": 0.3703125, "calibration/coverage@10%": 0.5713541666666666, "calibration/coverage@15%": 0.6640625, "calibration/coverage@20%": 0.7374999999999999, "calibration/coverage@25%": 0.7828125000000001, "calibration/coverage@30%": 0.8005208333333332, "calibration/coverage@5%": 0.484375, "calibration/ece": 0.1975291145833334, "calibration/mean_confidence": 0.5528219270833333, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002517361111111138, "completions/max_length": 4036.2, "completions/max_terminated_length": 4036.2, "completions/mean_length": 1708.0447265625, "completions/mean_terminated_length": 1712.33408203125, "completions/min_length": 0.0, "completions/min_terminated_length": 578.2, "epoch": 2.3071961600479995, "grad_norm": 0.0025664744898676872, "learning_rate": 4.807692307692308e-07, "loss": -0.0059, "num_tokens": 2855985721.0, "reward": 1.0242048621177673, "reward_std": 0.1011396512389183, "rewards/accuracy_reward": 0.7341145873069763, "rewards/brier_reward": 0.8207546234130859, "rewards/confidence_uniqueness_reward": 0.9482064723968506, "rewards/format_reward": 0.997569453716278, "rewards/frontier_coverage_0": 0.005981969460844993, "rewards/frontier_coverage_1": 0.005981969460844993, "rewards/frontier_coverage_10": 0.06367864459753036, "rewards/frontier_coverage_15": 0.12802914083003997, "rewards/frontier_coverage_20": 0.2087888687849045, "rewards/frontier_coverage_25": 0.3009022116661072, "rewards/frontier_coverage_5": 0.0064484432339668276, "rewards/frontier_entropy_batch_reward": -0.28826587498188017, "signal/accuracy_reward/centered_abs_mean": 0.11920030564069747, "signal/accuracy_reward/group_std_mean": 0.15845068097114562, "signal/accuracy_reward/group_zero_std_frac": 0.5416666805744171, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9978924989700317, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05960015282034874, "signal/advantage_abs_mean": 0.7690481185913086, "signal/advantage_pre_scale_abs_mean": 0.07742958068847657, "signal/advantage_pre_scale_std": 0.12594334036111832, "signal/advantage_std": 0.9828057885169983, "signal/brier_reward/centered_abs_mean": 0.11113527119159698, "signal/brier_reward/group_std_mean": 0.14212769269943237, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1862643241882324, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011113526858389377, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01761804409325123, "signal/confidence_uniqueness_reward/group_std_mean": 0.025302357226610183, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029750457778573037, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017618043581023811, "signal/format_reward/centered_abs_mean": 0.004372829792555421, "signal/format_reward/group_std_mean": 0.008926727809011936, "signal/format_reward/group_zero_std_frac": 0.9611111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.036962130852043626, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0021864148962777107, "signal/frontier_coverage_0/centered_abs_mean": 0.16451604068279266, "signal/frontier_coverage_0/group_std_mean": 0.21193538308143617, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.039420148730278014, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0023525793571025134, "signal/frontier_coverage_1/centered_abs_mean": 0.16451604068279266, "signal/frontier_coverage_1/group_std_mean": 0.21193538308143617, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.039420148730278014, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0023525793571025134, "signal/frontier_coverage_10/centered_abs_mean": 0.06357394829392433, "signal/frontier_coverage_10/group_std_mean": 0.07909071594476699, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.015305314771831035, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009091074694879353, "signal/frontier_coverage_15/centered_abs_mean": 0.08338246792554856, "signal/frontier_coverage_15/group_std_mean": 0.10340235531330108, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02012072168290615, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0011923692654818296, "signal/frontier_coverage_20/centered_abs_mean": 0.11507630050182342, "signal/frontier_coverage_20/group_std_mean": 0.14356274902820587, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.027765774726867677, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0016455910867080093, "signal/frontier_coverage_25/centered_abs_mean": 0.1533314347267151, "signal/frontier_coverage_25/group_std_mean": 0.1919599086046219, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03697655647993088, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00219263955950737, "signal/frontier_coverage_5/centered_abs_mean": 0.16278515756130219, "signal/frontier_coverage_5/group_std_mean": 0.20979312360286712, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.0390064924955368, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0023278276901692154, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31930898427963256, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38691142201423645, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5392434418201446, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03193089962005615, "step": 960 }, { "calibration/aurc": 0.14038244862449353, "calibration/batch_distribution_entropy": 0.9546107782096881, "calibration/batch_entropy_100bins": 0.9485610508453283, "calibration/batch_entropy_10bins": 0.9546107782096881, "calibration/batch_entropy_50bins": 0.9569943377115798, "calibration/batch_uniqueness": 0.9503503874846292, "calibration/confidence_entropy": 0.488693646883382, "calibration/coverage@0%": 0.0385498687664042, "calibration/coverage@1%": 0.0385498687664042, "calibration/coverage@10%": 0.2287483595800525, "calibration/coverage@15%": 0.6391199146981626, "calibration/coverage@20%": 0.8946030183727034, "calibration/coverage@25%": 0.9744627624671915, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.14584153543307088, "calibration/distribution_entropy_10": 0.9546107782096881, "calibration/distribution_entropy_100": 0.9485610508453283, "calibration/ece": 0.18004387381069556, "calibration/mean_confidence": 0.6024290353920604, "calibration/unique_confidence_per_question": 0.7791666666666667, "calibration/unique_confidences": 299.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0014756944444444641, "completions/max_length": 3931.6, "completions/max_terminated_length": 3931.6, "completions/mean_length": 1719.3753662109375, "completions/mean_terminated_length": 1721.9231201171874, "completions/min_length": 0.0, "completions/min_terminated_length": 465.6, "epoch": 2.3191960100498745, "grad_norm": 0.00238273898139596, "learning_rate": 4.507211538461539e-07, "loss": -0.0002, "num_tokens": 2878891805.0, "reward": 1.0996188402175904, "reward_std": 0.09667720943689347, "rewards/accuracy_reward": 0.7565104246139527, "rewards/brier_reward": 0.8399082064628601, "rewards/confidence_uniqueness_reward": 0.9473569512367248, "rewards/format_reward": 0.9984375, "rewards/frontier_coverage_0": 0.7498524904251098, "rewards/frontier_coverage_1": 0.7498524904251098, "rewards/frontier_coverage_10": 0.7498524904251098, "rewards/frontier_coverage_15": 0.7498524904251098, "rewards/frontier_coverage_20": 0.7498524904251098, "rewards/frontier_coverage_25": 0.7498524904251098, "rewards/frontier_coverage_5": 0.7498524904251098, "rewards/frontier_entropy_batch_reward": -0.31641929149627684, "signal/accuracy_reward/centered_abs_mean": 0.10496419221162796, "signal/accuracy_reward/group_std_mean": 0.14212748557329177, "signal/accuracy_reward/group_zero_std_frac": 0.5777777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8967318654060363, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05248209610581398, "signal/advantage_abs_mean": 0.7712378382682801, "signal/advantage_pre_scale_abs_mean": 0.07349895536899567, "signal/advantage_pre_scale_std": 0.1213487908244133, "signal/advantage_std": 0.9827686429023743, "signal/brier_reward/centered_abs_mean": 0.10113995522260666, "signal/brier_reward/group_std_mean": 0.1298495277762413, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17362068593502045, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01011399570852518, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.017154244333505632, "signal/confidence_uniqueness_reward/group_std_mean": 0.02381323203444481, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02954300418496132, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0017154245171695948, "signal/format_reward/centered_abs_mean": 0.002907986077480018, "signal/format_reward/group_std_mean": 0.006289407191798091, "signal/format_reward/group_zero_std_frac": 0.9722222208976745, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.024624919146299364, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.001453993038740009, "signal/frontier_coverage_0/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_0/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_1/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_1/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_10/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_10/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_15/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_15/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_20/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_20/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_25/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_25/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_coverage_5/centered_abs_mean": 0.14556353986263276, "signal/frontier_coverage_5/group_std_mean": 0.18044343292713166, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03574709594249725, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002081558620557189, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32960216999053954, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3915234744548798, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5683565855026245, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03296021893620491, "step": 965 }, { "calibration/aurc": 0.11166700871292294, "calibration/batch_distribution_entropy": 0.9278303770940528, "calibration/batch_entropy_100bins": 0.9340343879809385, "calibration/batch_entropy_10bins": 0.9278303770940528, "calibration/batch_entropy_50bins": 0.9419337319119098, "calibration/batch_uniqueness": 0.9451849405959892, "calibration/confidence_entropy": 0.468146193171627, "calibration/coverage@0%": 0.09965051131418624, "calibration/coverage@1%": 0.25905814838990426, "calibration/coverage@10%": 0.6087140992167102, "calibration/coverage@15%": 0.6899926566579635, "calibration/coverage@20%": 0.8051362597911227, "calibration/coverage@25%": 0.9020833333333332, "calibration/coverage@30%": 0.9364583333333334, "calibration/coverage@5%": 0.35659812880765884, "calibration/distribution_entropy_10": 0.9278303770940528, "calibration/distribution_entropy_100": 0.9340343879809385, "calibration/ece": 0.14054123814186253, "calibration/mean_confidence": 0.6327841136858138, "calibration/unique_confidence_per_question": 0.78125, "calibration/unique_confidences": 300.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00546875, "completions/max_length": 4001.6, "completions/max_terminated_length": 4001.6, "completions/mean_length": 1679.43603515625, "completions/mean_terminated_length": 1688.64921875, "completions/min_length": 0.0, "completions/min_terminated_length": 607.6, "epoch": 2.3311958600517495, "grad_norm": 0.0024200736079365015, "learning_rate": 4.20673076923077e-07, "loss": -0.0136, "num_tokens": 2901341820.0, "reward": 1.0947325229644775, "reward_std": 0.0987067922949791, "rewards/accuracy_reward": 0.7580729126930237, "rewards/brier_reward": 0.8338273406028748, "rewards/confidence_uniqueness_reward": 0.9415804147720337, "rewards/format_reward": 0.99453125, "rewards/frontier_coverage_0": 0.7456173777580262, "rewards/frontier_coverage_1": 0.7456173777580262, "rewards/frontier_coverage_10": 0.7456173777580262, "rewards/frontier_coverage_15": 0.7456173777580262, "rewards/frontier_coverage_20": 0.7456173777580262, "rewards/frontier_coverage_25": 0.7456173777580262, "rewards/frontier_coverage_5": 0.7456173777580262, "rewards/frontier_entropy_batch_reward": -0.3374656796455383, "signal/accuracy_reward/centered_abs_mean": 0.10191514790058136, "signal/accuracy_reward/group_std_mean": 0.13759705126285554, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9039199709892273, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05095757395029068, "signal/advantage_abs_mean": 0.7747997283935547, "signal/advantage_pre_scale_abs_mean": 0.07487674057483673, "signal/advantage_pre_scale_std": 0.12937503159046174, "signal/advantage_std": 0.9827169299125671, "signal/brier_reward/centered_abs_mean": 0.10510815382003784, "signal/brier_reward/group_std_mean": 0.13565416336059571, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18632941842079162, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010510815307497978, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022095327824354173, "signal/confidence_uniqueness_reward/group_std_mean": 0.030155374109745024, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03923738077282905, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002209532866254449, "signal/format_reward/centered_abs_mean": 0.00778537318110466, "signal/format_reward/group_std_mean": 0.012528749741613865, "signal/format_reward/group_zero_std_frac": 0.9555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06878926306962967, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00389268659055233, "signal/frontier_coverage_0/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_0/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_1/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_1/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_10/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_10/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_15/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_15/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_20/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_20/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_25/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_25/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_coverage_5/centered_abs_mean": 0.14680063724517822, "signal/frontier_coverage_5/group_std_mean": 0.18313476145267488, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03721518889069557, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002099249139428139, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3281488955020905, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39026449918746947, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5830213069915772, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03281489051878452, "step": 970 }, { "calibration/aurc": 0.048921450858412416, "calibration/batch_distribution_entropy": 0.9410208167580578, "calibration/batch_entropy_100bins": 0.9465772863304492, "calibration/batch_entropy_10bins": 0.9410208167580578, "calibration/batch_entropy_50bins": 0.9525928211916714, "calibration/batch_uniqueness": 0.9487509328672419, "calibration/buffer_distribution_entropy": 0.9602970568035275, "calibration/buffer_entropy_100bins": 0.9785693215386517, "calibration/buffer_entropy_10bins": 0.9602970568035275, "calibration/buffer_entropy_50bins": 0.9755793518739612, "calibration/confidence_entropy": 0.47804082687512295, "calibration/coverage@0%": 0.19668869669277633, "calibration/coverage@1%": 0.3411730308964317, "calibration/coverage@10%": 0.8179952676240209, "calibration/coverage@15%": 0.898815546127067, "calibration/coverage@20%": 0.9614066579634464, "calibration/coverage@25%": 0.9921834203655353, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.7199507724107919, "calibration/distribution_entropy_10": 0.9410208167580578, "calibration/distribution_entropy_100": 0.9465772863304492, "calibration/ece": 0.16123091737296644, "calibration/mean_confidence": 0.6316424365869987, "calibration/unique_confidence_per_question": 0.7776041666666667, "calibration/unique_confidences": 298.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003125, "completions/max_length": 4034.8, "completions/max_terminated_length": 4034.8, "completions/mean_length": 1692.332421875, "completions/mean_terminated_length": 1697.679736328125, "completions/min_length": 0.0, "completions/min_terminated_length": 541.2, "epoch": 2.3431957100536245, "grad_norm": 0.002366076922044158, "learning_rate": 3.90625e-07, "loss": -0.0098, "num_tokens": 2923899633.0, "reward": 1.0969661951065064, "reward_std": 0.09302805066108703, "rewards/accuracy_reward": 0.7771701335906982, "rewards/brier_reward": 0.8628766059875488, "rewards/confidence_uniqueness_reward": 0.943256139755249, "rewards/format_reward": 0.996875, "rewards/frontier_coverage_0": 0.6225879438221454, "rewards/frontier_coverage_1": 0.6225879438221454, "rewards/frontier_coverage_10": 0.639605250954628, "rewards/frontier_coverage_15": 0.6584436506032944, "rewards/frontier_coverage_20": 0.6852690279483795, "rewards/frontier_coverage_25": 0.7049791038036346, "rewards/frontier_coverage_5": 0.6253482840955258, "rewards/frontier_entropy_batch_reward": -0.35860825181007383, "signal/accuracy_reward/centered_abs_mean": 0.0921603724360466, "signal/accuracy_reward/group_std_mean": 0.12776372581720352, "signal/accuracy_reward/group_zero_std_frac": 0.6138888835906983, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8368605613708496, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0460801862180233, "signal/advantage_abs_mean": 0.7728834629058838, "signal/advantage_pre_scale_abs_mean": 0.06947359591722488, "signal/advantage_pre_scale_std": 0.12066877037286758, "signal/advantage_std": 0.9826517581939698, "signal/brier_reward/centered_abs_mean": 0.09350483268499374, "signal/brier_reward/group_std_mean": 0.12106747925281525, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1714227616786957, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009350483864545822, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020120499655604362, "signal/confidence_uniqueness_reward/group_std_mean": 0.02889779768884182, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03697131425142288, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002012050012126565, "signal/format_reward/centered_abs_mean": 0.005305989552289248, "signal/format_reward/group_std_mean": 0.010686865262687206, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.048372025787830356, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002652994776144624, "signal/frontier_coverage_0/centered_abs_mean": 0.134758859872818, "signal/frontier_coverage_0/group_std_mean": 0.16941750347614287, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03526972904801369, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019270517397671938, "signal/frontier_coverage_1/centered_abs_mean": 0.134758859872818, "signal/frontier_coverage_1/group_std_mean": 0.16941750347614287, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03526972904801369, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019270517397671938, "signal/frontier_coverage_10/centered_abs_mean": 0.12533134520053862, "signal/frontier_coverage_10/group_std_mean": 0.15608875453472137, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.032518448680639266, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0017922382801771164, "signal/frontier_coverage_15/centered_abs_mean": 0.13210234493017198, "signal/frontier_coverage_15/group_std_mean": 0.16414720118045806, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.03449446447193623, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0018890635343268513, "signal/frontier_coverage_20/centered_abs_mean": 0.14086658358573914, "signal/frontier_coverage_20/group_std_mean": 0.17497550547122956, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03705217763781547, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020143922185525296, "signal/frontier_coverage_25/centered_abs_mean": 0.14678715467453002, "signal/frontier_coverage_25/group_std_mean": 0.182331645488739, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.03878000974655151, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0020990563789382577, "signal/frontier_coverage_5/centered_abs_mean": 0.12934952080249787, "signal/frontier_coverage_5/group_std_mean": 0.1625670924782753, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03369109369814396, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018496982054784894, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3399089515209198, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40003854036331177, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6255879998207092, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03399089723825455, "step": 975 }, { "calibration/aurc": 0.14298997154471915, "calibration/batch_distribution_entropy": 0.9500791369676123, "calibration/batch_entropy_100bins": 0.9463294097352252, "calibration/batch_entropy_10bins": 0.9500791369676123, "calibration/batch_entropy_50bins": 0.9563625427542449, "calibration/batch_uniqueness": 0.9484823954659024, "calibration/buffer_distribution_entropy": 0.9642788407621218, "calibration/buffer_entropy_100bins": 0.9807640451660106, "calibration/buffer_entropy_10bins": 0.9642788407621218, "calibration/buffer_entropy_50bins": 0.978117026514248, "calibration/confidence_entropy": 0.47567245891365834, "calibration/coverage@0%": 0.10758928571428572, "calibration/coverage@1%": 0.22373511904761903, "calibration/coverage@10%": 0.40436507936507937, "calibration/coverage@15%": 0.5170552248677248, "calibration/coverage@20%": 0.6459656084656085, "calibration/coverage@25%": 0.8712136243386244, "calibration/coverage@30%": 0.9302910052910054, "calibration/coverage@5%": 0.3556878306878307, "calibration/distribution_entropy_10": 0.9500791369676123, "calibration/distribution_entropy_100": 0.9463294097352252, "calibration/ece": 0.15938840153769837, "calibration/mean_confidence": 0.5839293696263227, "calibration/unique_confidence_per_question": 0.7890625000000001, "calibration/unique_confidences": 303.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003645833333333348, "completions/max_length": 4007.8, "completions/max_terminated_length": 4007.8, "completions/mean_length": 1795.9944580078125, "completions/mean_terminated_length": 1802.4992919921874, "completions/min_length": 0.0, "completions/min_terminated_length": 569.6, "epoch": 2.3551955600554995, "grad_norm": 0.0023362021893262863, "learning_rate": 3.6057692307692306e-07, "loss": -0.0079, "num_tokens": 2947693457.0, "reward": 1.024513053894043, "reward_std": 0.09492753744125366, "rewards/accuracy_reward": 0.7426215291023255, "rewards/brier_reward": 0.8439577341079711, "rewards/confidence_uniqueness_reward": 0.9420446038246155, "rewards/format_reward": 0.9963541626930237, "rewards/frontier_coverage_0": 0.01960038566030562, "rewards/frontier_coverage_1": 0.01960038566030562, "rewards/frontier_coverage_10": 0.0827787920832634, "rewards/frontier_coverage_15": 0.15930041372776033, "rewards/frontier_coverage_20": 0.2687810301780701, "rewards/frontier_coverage_25": 0.3554062366485596, "rewards/frontier_coverage_5": 0.022010414488613607, "rewards/frontier_entropy_batch_reward": -0.36837931871414187, "signal/accuracy_reward/centered_abs_mean": 0.09831271767616272, "signal/accuracy_reward/group_std_mean": 0.13480441719293595, "signal/accuracy_reward/group_zero_std_frac": 0.5944444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.873695683479309, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04915635883808136, "signal/advantage_abs_mean": 0.7633371233940125, "signal/advantage_pre_scale_abs_mean": 0.07063713744282722, "signal/advantage_pre_scale_std": 0.12072601765394211, "signal/advantage_std": 0.9826791763305665, "signal/brier_reward/centered_abs_mean": 0.10119090974330902, "signal/brier_reward/group_std_mean": 0.13194870799779893, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18225338459014892, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010119091346859932, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021581395342946053, "signal/confidence_uniqueness_reward/group_std_mean": 0.030861319229006767, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03918079622089863, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002158139576204121, "signal/format_reward/centered_abs_mean": 0.006477864505723119, "signal/format_reward/group_std_mean": 0.012327943369746209, "signal/format_reward/group_zero_std_frac": 0.950000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05790370739996433, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032389322528615593, "signal/frontier_coverage_0/centered_abs_mean": 0.13417006731033326, "signal/frontier_coverage_0/group_std_mean": 0.17341846227645874, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.034544138610363005, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019186319317668677, "signal/frontier_coverage_1/centered_abs_mean": 0.13417006731033326, "signal/frontier_coverage_1/group_std_mean": 0.17341846227645874, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.034544138610363005, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019186319317668677, "signal/frontier_coverage_10/centered_abs_mean": 0.06774536669254302, "signal/frontier_coverage_10/group_std_mean": 0.08314146995544433, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.0175961634144187, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009687587735243142, "signal/frontier_coverage_15/centered_abs_mean": 0.09616223573684693, "signal/frontier_coverage_15/group_std_mean": 0.11858321875333785, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024944596737623215, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013751199934631587, "signal/frontier_coverage_20/centered_abs_mean": 0.13780849874019624, "signal/frontier_coverage_20/group_std_mean": 0.17135028541088104, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.035683315992355344, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019706616178154944, "signal/frontier_coverage_25/centered_abs_mean": 0.17244452834129334, "signal/frontier_coverage_25/group_std_mean": 0.21541389226913452, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04460631459951401, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.00246595679782331, "signal/frontier_coverage_5/centered_abs_mean": 0.11228355765342712, "signal/frontier_coverage_5/group_std_mean": 0.14591508209705353, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.028914512321352958, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016056548804044724, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33653807640075684, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3984680354595184, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6106127738952637, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03365381136536598, "step": 980 }, { "calibration/aurc": 0.0753109634174734, "calibration/batch_distribution_entropy": 0.9480128859763612, "calibration/batch_entropy_100bins": 0.9484348062409662, "calibration/batch_entropy_10bins": 0.9480128859763612, "calibration/batch_entropy_50bins": 0.9575373289371351, "calibration/batch_uniqueness": 0.948753060265835, "calibration/buffer_distribution_entropy": 0.9667386152705809, "calibration/buffer_entropy_100bins": 0.9821239026124247, "calibration/buffer_entropy_10bins": 0.9667386152705809, "calibration/buffer_entropy_50bins": 0.9797207122682059, "calibration/confidence_entropy": 0.4779945092334944, "calibration/coverage@0%": 0.23802083333333335, "calibration/coverage@1%": 0.2598958333333333, "calibration/coverage@10%": 0.6843192449956484, "calibration/coverage@15%": 0.886958768494343, "calibration/coverage@20%": 0.934375, "calibration/coverage@25%": 0.9619791666666668, "calibration/coverage@30%": 0.9765625, "calibration/coverage@5%": 0.5290279590948651, "calibration/distribution_entropy_10": 0.9480128859763612, "calibration/distribution_entropy_100": 0.9484348062409662, "calibration/ece": 0.19405203231070495, "calibration/mean_confidence": 0.6091119354873803, "calibration/unique_confidence_per_question": 0.7791666666666666, "calibration/unique_confidences": 299.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0026041666666666964, "completions/max_length": 3950.8, "completions/max_terminated_length": 3950.8, "completions/mean_length": 1796.334765625, "completions/mean_terminated_length": 1801.1148193359375, "completions/min_length": 130.6, "completions/min_terminated_length": 563.8, "epoch": 2.3671954100573744, "grad_norm": 0.0022584237158298492, "learning_rate": 3.305288461538462e-07, "loss": -0.0057, "num_tokens": 2971492257.0, "reward": 1.0601597785949708, "reward_std": 0.08923779428005219, "rewards/accuracy_reward": 0.8031249880790711, "rewards/brier_reward": 0.8480790615081787, "rewards/confidence_uniqueness_reward": 0.9452744245529174, "rewards/format_reward": 0.9973958253860473, "rewards/frontier_coverage_0": -0.015677616419270634, "rewards/frontier_coverage_1": -0.015677616419270634, "rewards/frontier_coverage_10": 0.08821566551923751, "rewards/frontier_coverage_15": 0.17647169828414916, "rewards/frontier_coverage_20": 0.2998010993003845, "rewards/frontier_coverage_25": 0.39475311040878297, "rewards/frontier_coverage_5": -0.002906990051269531, "rewards/frontier_entropy_batch_reward": -0.32663238048553467, "signal/accuracy_reward/centered_abs_mean": 0.09223090261220931, "signal/accuracy_reward/group_std_mean": 0.12533488720655442, "signal/accuracy_reward/group_zero_std_frac": 0.6194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8646474599838256, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04611545130610466, "signal/advantage_abs_mean": 0.7746138095855712, "signal/advantage_pre_scale_abs_mean": 0.06815963685512542, "signal/advantage_pre_scale_std": 0.11540376543998718, "signal/advantage_std": 0.9826080322265625, "signal/brier_reward/centered_abs_mean": 0.09722411036491393, "signal/brier_reward/group_std_mean": 0.12465722560882568, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18319908082485198, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009722411073744297, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.018586510978639125, "signal/confidence_uniqueness_reward/group_std_mean": 0.0264458317309618, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.034793031960725786, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018586511258035898, "signal/format_reward/centered_abs_mean": 0.0044596354942768816, "signal/format_reward/group_std_mean": 0.00908562783151865, "signal/format_reward/group_zero_std_frac": 0.9583333373069763, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04040036499500275, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0022298177471384408, "signal/frontier_coverage_0/centered_abs_mean": 0.13336831480264663, "signal/frontier_coverage_0/group_std_mean": 0.17140342593193053, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.035881773382425305, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019071669783443212, "signal/frontier_coverage_1/centered_abs_mean": 0.13336831480264663, "signal/frontier_coverage_1/group_std_mean": 0.17140342593193053, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035881773382425305, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019071669783443212, "signal/frontier_coverage_10/centered_abs_mean": 0.0660796619951725, "signal/frontier_coverage_10/group_std_mean": 0.08096126317977906, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017853300645947458, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009449392091482878, "signal/frontier_coverage_15/centered_abs_mean": 0.09556379914283752, "signal/frontier_coverage_15/group_std_mean": 0.11702702194452286, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.025842766091227532, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013665623031556607, "signal/frontier_coverage_20/centered_abs_mean": 0.13735153675079345, "signal/frontier_coverage_20/group_std_mean": 0.1688213050365448, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03713957220315933, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001964126992970705, "signal/frontier_coverage_25/centered_abs_mean": 0.16979779601097106, "signal/frontier_coverage_25/group_std_mean": 0.2093061089515686, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04587937220931053, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002428108500316739, "signal/frontier_coverage_5/centered_abs_mean": 0.11376264691352844, "signal/frontier_coverage_5/group_std_mean": 0.14681884348392488, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.030614623427391054, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016268058447167278, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3292146623134613, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39027782082557677, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6217843651771545, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03292146697640419, "step": 985 }, { "calibration/aurc": 0.058988793729192454, "calibration/batch_distribution_entropy": 0.9364811635529179, "calibration/batch_entropy_100bins": 0.9394896616791195, "calibration/batch_entropy_10bins": 0.9364811635529179, "calibration/batch_entropy_50bins": 0.9464707680021032, "calibration/batch_uniqueness": 0.9467230902777777, "calibration/buffer_distribution_entropy": 0.9667519942725857, "calibration/buffer_entropy_100bins": 0.9823825593852069, "calibration/buffer_entropy_10bins": 0.9667519942725857, "calibration/buffer_entropy_50bins": 0.9798715333534596, "calibration/confidence_entropy": 0.4910575180548623, "calibration/coverage@0%": 0.24322916666666666, "calibration/coverage@1%": 0.3567708333333333, "calibration/coverage@10%": 0.8140625, "calibration/coverage@15%": 0.8854166666666667, "calibration/coverage@20%": 0.9416666666666668, "calibration/coverage@25%": 0.9979166666666668, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5692708333333333, "calibration/distribution_entropy_10": 0.9364811635529179, "calibration/distribution_entropy_100": 0.9394896616791195, "calibration/ece": 0.20949534375, "calibration/mean_confidence": 0.6143158645833333, "calibration/unique_confidence_per_question": 0.7822916666666667, "calibration/unique_confidences": 300.4, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002777777777777746, "completions/max_length": 4015.6, "completions/max_terminated_length": 4015.6, "completions/mean_length": 1833.0742431640624, "completions/mean_terminated_length": 1838.1804443359374, "completions/min_length": 0.0, "completions/min_terminated_length": 760.0, "epoch": 2.3791952600592494, "grad_norm": 0.002287256298586726, "learning_rate": 3.0048076923076924e-07, "loss": -0.0074, "num_tokens": 2995705528.0, "reward": 1.0458185434341432, "reward_std": 0.09111074954271317, "rewards/accuracy_reward": 0.7794270873069763, "rewards/brier_reward": 0.8258747100830078, "rewards/confidence_uniqueness_reward": 0.9460436582565308, "rewards/format_reward": 0.9972222208976745, "rewards/frontier_coverage_0": -0.020203251019120218, "rewards/frontier_coverage_1": -0.020203251019120218, "rewards/frontier_coverage_10": 0.08284911960363388, "rewards/frontier_coverage_15": 0.17000916302204133, "rewards/frontier_coverage_20": 0.2834066033363342, "rewards/frontier_coverage_25": 0.35660980343818666, "rewards/frontier_coverage_5": -0.003650544723495841, "rewards/frontier_entropy_batch_reward": -0.3183602750301361, "signal/accuracy_reward/centered_abs_mean": 0.0955023854970932, "signal/accuracy_reward/group_std_mean": 0.13166273087263108, "signal/accuracy_reward/group_zero_std_frac": 0.600000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8537281632423401, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0477511927485466, "signal/advantage_abs_mean": 0.7701890587806701, "signal/advantage_pre_scale_abs_mean": 0.06861250698566437, "signal/advantage_pre_scale_std": 0.11559868305921554, "signal/advantage_std": 0.9826963424682618, "signal/brier_reward/centered_abs_mean": 0.10571289658546448, "signal/brier_reward/group_std_mean": 0.13531849682331085, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18983431458473204, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010571289993822574, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01879030391573906, "signal/confidence_uniqueness_reward/group_std_mean": 0.0264260970056057, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03378798738121987, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018790304427966475, "signal/format_reward/centered_abs_mean": 0.004947916697710752, "signal/format_reward/group_std_mean": 0.009209575690329075, "signal/format_reward/group_zero_std_frac": 0.9638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0445366695523262, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002473958348855376, "signal/frontier_coverage_0/centered_abs_mean": 0.14547624588012695, "signal/frontier_coverage_0/group_std_mean": 0.18774136900901794, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03729048147797585, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020803103223443033, "signal/frontier_coverage_1/centered_abs_mean": 0.14547624588012695, "signal/frontier_coverage_1/group_std_mean": 0.18774136900901794, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03729048147797585, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020803103223443033, "signal/frontier_coverage_10/centered_abs_mean": 0.06864608377218247, "signal/frontier_coverage_10/group_std_mean": 0.08440887182950974, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01762332357466221, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009816389763727784, "signal/frontier_coverage_15/centered_abs_mean": 0.09738789051771164, "signal/frontier_coverage_15/group_std_mean": 0.12021335512399674, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02502138651907444, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.001392646855674684, "signal/frontier_coverage_20/centered_abs_mean": 0.13724444508552552, "signal/frontier_coverage_20/group_std_mean": 0.1709604889154434, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03526832312345505, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019625954795628787, "signal/frontier_coverage_25/centered_abs_mean": 0.16329463422298432, "signal/frontier_coverage_25/group_std_mean": 0.20417420566082, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04194674119353294, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023351131472736596, "signal/frontier_coverage_5/centered_abs_mean": 0.11565566658973694, "signal/frontier_coverage_5/group_std_mean": 0.15015834867954253, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029612866416573524, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.001653875899501145, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3260919272899628, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39335213899612426, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5854568719863892, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326091930270195, "step": 990 }, { "calibration/aurc": 0.14222156184323334, "calibration/batch_distribution_entropy": 0.9415694556389482, "calibration/batch_entropy_100bins": 0.9432832874756203, "calibration/batch_entropy_10bins": 0.9415694556389482, "calibration/batch_entropy_50bins": 0.9505710303745625, "calibration/batch_uniqueness": 0.947582376897533, "calibration/buffer_distribution_entropy": 0.9669185277208856, "calibration/buffer_entropy_100bins": 0.9825735776521283, "calibration/buffer_entropy_10bins": 0.9669185277208856, "calibration/buffer_entropy_50bins": 0.9799605007398385, "calibration/confidence_entropy": 0.4812596197164378, "calibration/coverage@0%": 0.2971660139251523, "calibration/coverage@1%": 0.33005330722367276, "calibration/coverage@10%": 0.4866677545691907, "calibration/coverage@15%": 0.5754011640557005, "calibration/coverage@20%": 0.7131187445604874, "calibration/coverage@25%": 0.8065437336814621, "calibration/coverage@30%": 0.8456851066144473, "calibration/coverage@5%": 0.3843274042645779, "calibration/distribution_entropy_10": 0.9415694556389482, "calibration/distribution_entropy_100": 0.9432832874756203, "calibration/ece": 0.1949963851854874, "calibration/mean_confidence": 0.5996266332272628, "calibration/unique_confidence_per_question": 0.7765625, "calibration/unique_confidences": 298.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002951388888888862, "completions/max_length": 4036.0, "completions/max_terminated_length": 4036.0, "completions/mean_length": 1861.501123046875, "completions/mean_terminated_length": 1867.0845458984375, "completions/min_length": 0.0, "completions/min_terminated_length": 724.4, "epoch": 2.3911951100611244, "grad_norm": 0.0023452253080904484, "learning_rate": 2.7043269230769233e-07, "loss": -0.0073, "num_tokens": 3020283909.0, "reward": 1.0259827852249146, "reward_std": 0.09166586697101593, "rewards/accuracy_reward": 0.7354166626930236, "rewards/brier_reward": 0.830725634098053, "rewards/confidence_uniqueness_reward": 0.9464214205741882, "rewards/format_reward": 0.9970486044883728, "rewards/frontier_coverage_0": 0.018453091010451318, "rewards/frontier_coverage_1": 0.018490078300237654, "rewards/frontier_coverage_10": 0.08058418780565262, "rewards/frontier_coverage_15": 0.15889862924814224, "rewards/frontier_coverage_20": 0.26112279295921326, "rewards/frontier_coverage_25": 0.33524413108825685, "rewards/frontier_coverage_5": 0.021506571979261934, "rewards/frontier_entropy_batch_reward": -0.30753050446510316, "signal/accuracy_reward/centered_abs_mean": 0.0952799454331398, "signal/accuracy_reward/group_std_mean": 0.12790547311306, "signal/accuracy_reward/group_zero_std_frac": 0.6194444537162781, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.855156683921814, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0476399727165699, "signal/advantage_abs_mean": 0.7717325329780579, "signal/advantage_pre_scale_abs_mean": 0.0698886714875698, "signal/advantage_pre_scale_std": 0.11719027608633041, "signal/advantage_std": 0.9826955795288086, "signal/brier_reward/centered_abs_mean": 0.10128604173660279, "signal/brier_reward/group_std_mean": 0.1313249558210373, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18230506181716918, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010128603875637054, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01827959679067135, "signal/confidence_uniqueness_reward/group_std_mean": 0.026158673316240312, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03279260098934174, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018279596930369734, "signal/format_reward/centered_abs_mean": 0.0046115451375953855, "signal/format_reward/group_std_mean": 0.009491527453064919, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04077572412788868, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0023057725687976927, "signal/frontier_coverage_0/centered_abs_mean": 0.1416991651058197, "signal/frontier_coverage_0/group_std_mean": 0.18187021017074584, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.036441127955913546, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020262979669496417, "signal/frontier_coverage_1/centered_abs_mean": 0.1416353702545166, "signal/frontier_coverage_1/group_std_mean": 0.1817883223295212, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03642522916197777, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020253857830539344, "signal/frontier_coverage_10/centered_abs_mean": 0.06796745508909226, "signal/frontier_coverage_10/group_std_mean": 0.08356752395629882, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017518576234579086, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000971934583503753, "signal/frontier_coverage_15/centered_abs_mean": 0.09602248519659043, "signal/frontier_coverage_15/group_std_mean": 0.11829137653112412, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024734945222735404, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.00137312151491642, "signal/frontier_coverage_20/centered_abs_mean": 0.13371631503105164, "signal/frontier_coverage_20/group_std_mean": 0.16593950092792512, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034411372244358064, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019121433142572641, "signal/frontier_coverage_25/centered_abs_mean": 0.16211245357990264, "signal/frontier_coverage_25/group_std_mean": 0.2016547739505768, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.041706757992506026, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002318208012729883, "signal/frontier_coverage_5/centered_abs_mean": 0.12877790927886962, "signal/frontier_coverage_5/group_std_mean": 0.16575416028499604, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03314310386776924, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0018415240803733468, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33054853081703184, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39594523310661317, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5939931273460388, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03305485397577286, "step": 995 }, { "calibration/aurc": 0.07342741644024595, "calibration/batch_distribution_entropy": 0.9301535869855403, "calibration/batch_entropy_100bins": 0.9386370711182925, "calibration/batch_entropy_10bins": 0.9301535869855403, "calibration/batch_entropy_50bins": 0.9463011690348649, "calibration/batch_uniqueness": 0.9467597241919155, "calibration/buffer_distribution_entropy": 0.9674407780157438, "calibration/buffer_entropy_100bins": 0.9828415632458573, "calibration/buffer_entropy_10bins": 0.9674407780157438, "calibration/buffer_entropy_50bins": 0.9802421205624169, "calibration/confidence_entropy": 0.479119943513154, "calibration/coverage@0%": 0.16233956684165451, "calibration/coverage@1%": 0.2358736181906536, "calibration/coverage@10%": 0.7203064184228933, "calibration/coverage@15%": 0.8644619795381985, "calibration/coverage@20%": 0.9070068389246331, "calibration/coverage@25%": 0.9457938968668408, "calibration/coverage@30%": 0.9781222802436901, "calibration/coverage@5%": 0.6136452662966068, "calibration/distribution_entropy_10": 0.9301535869855403, "calibration/distribution_entropy_100": 0.9386370711182925, "calibration/ece": 0.19834418927249478, "calibration/mean_confidence": 0.6189614354133265, "calibration/unique_confidence_per_question": 0.7708333333333334, "calibration/unique_confidences": 296.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666674, "completions/max_length": 3995.6, "completions/max_terminated_length": 3995.6, "completions/mean_length": 1884.9341064453124, "completions/mean_terminated_length": 1892.77158203125, "completions/min_length": 0.0, "completions/min_terminated_length": 643.8, "epoch": 2.4031949600629994, "grad_norm": 0.0023564172443002462, "learning_rate": 2.403846153846154e-07, "loss": -0.0104, "num_tokens": 3045098126.0, "reward": 1.050121831893921, "reward_std": 0.09569599479436874, "rewards/accuracy_reward": 0.7814236044883728, "rewards/brier_reward": 0.8372802495956421, "rewards/confidence_uniqueness_reward": 0.9458718299865723, "rewards/format_reward": 0.9958333373069763, "rewards/frontier_coverage_0": -0.010130425938405097, "rewards/frontier_coverage_1": -0.010074634104967117, "rewards/frontier_coverage_10": 0.08780712187290192, "rewards/frontier_coverage_15": 0.17775541841983794, "rewards/frontier_coverage_20": 0.2861971020698547, "rewards/frontier_coverage_25": 0.3673263430595398, "rewards/frontier_coverage_5": -0.002790766826365143, "rewards/frontier_entropy_batch_reward": -0.2963591665029526, "signal/accuracy_reward/centered_abs_mean": 0.09832899421453475, "signal/accuracy_reward/group_std_mean": 0.13242024779319764, "signal/accuracy_reward/group_zero_std_frac": 0.6111111223697663, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8578180551528931, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04916449710726738, "signal/advantage_abs_mean": 0.770215654373169, "signal/advantage_pre_scale_abs_mean": 0.07169848531484604, "signal/advantage_pre_scale_std": 0.12310145199298858, "signal/advantage_std": 0.982709777355194, "signal/brier_reward/centered_abs_mean": 0.09941399097442627, "signal/brier_reward/group_std_mean": 0.12858400940895082, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17628815174102783, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009941398911178113, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02001577503979206, "signal/confidence_uniqueness_reward/group_std_mean": 0.03095482215285301, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.036119457334280014, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002001577545888722, "signal/format_reward/centered_abs_mean": 0.007183159794658422, "signal/format_reward/group_std_mean": 0.015246148407459258, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06603498458862304, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003591579897329211, "signal/frontier_coverage_0/centered_abs_mean": 0.13947281688451768, "signal/frontier_coverage_0/group_std_mean": 0.1804393172264099, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03531498908996582, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001994461310096085, "signal/frontier_coverage_1/centered_abs_mean": 0.13939008265733718, "signal/frontier_coverage_1/group_std_mean": 0.18033737540245057, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.035294461995363235, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001993278227746487, "signal/frontier_coverage_10/centered_abs_mean": 0.06610210686922073, "signal/frontier_coverage_10/group_std_mean": 0.08138312101364135, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016847145184874533, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009452601661905646, "signal/frontier_coverage_15/centered_abs_mean": 0.09497750997543335, "signal/frontier_coverage_15/group_std_mean": 0.11707037836313247, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.024168269336223604, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0013581784442067146, "signal/frontier_coverage_20/centered_abs_mean": 0.1326014831662178, "signal/frontier_coverage_20/group_std_mean": 0.1643354892730713, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0336851567029953, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0018962011905387043, "signal/frontier_coverage_25/centered_abs_mean": 0.16154046058654786, "signal/frontier_coverage_25/group_std_mean": 0.20086792409420012, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.0409718930721283, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002310028485953808, "signal/frontier_coverage_5/centered_abs_mean": 0.12495265901088715, "signal/frontier_coverage_5/group_std_mean": 0.16214902102947235, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03164008669555187, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017868230119347573, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32641816735267637, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3914418339729309, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5837355196475983, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0326418187469244, "step": 1000 }, { "epoch": 2.4031949600629994, "eval_completions/clipped_ratio": 0.004340277777777772, "eval_completions/max_length": 3791.6666666666665, "eval_completions/max_terminated_length": 3791.6666666666665, "eval_completions/mean_length": 1842.2066446940105, "eval_completions/mean_terminated_length": 1850.1665445963542, "eval_completions/min_length": 407.1666666666667, "eval_completions/min_terminated_length": 795.0, "eval_loss": 0.0, "eval_num_tokens": 3045098126.0, "eval_reward": 0.9361613194147745, "eval_reward_std": 0.23841848721106848, "eval_rewards/accuracy_reward": 0.7092013955116272, "eval_rewards/brier_reward": 0.8246038556098938, "eval_rewards/confidence_uniqueness_reward": 0.8930894037087759, "eval_rewards/format_reward": 0.9947916666666666, "eval_rewards/frontier_coverage_0": 0.01996202681524058, "eval_rewards/frontier_coverage_1": 0.020027826928223174, "eval_rewards/frontier_coverage_10": 0.07537480567892392, "eval_rewards/frontier_coverage_15": 0.14895516633987427, "eval_rewards/frontier_coverage_20": 0.23826486865679422, "eval_rewards/frontier_coverage_25": 0.30555500090122223, "eval_rewards/frontier_coverage_5": 0.02225215562308828, "eval_rewards/frontier_entropy_batch_reward": -0.9947916666666666, "eval_runtime": 220.4409, "eval_samples_per_second": 4.536, "eval_signal/accuracy_reward/centered_abs_mean": 0.3987087657054265, "eval_signal/accuracy_reward/group_std_mean": 0.4515012751022975, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8477271099885305, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.19935438285271326, "eval_signal/advantage_abs_mean": 0.856895645459493, "eval_signal/advantage_pre_scale_abs_mean": 0.20530925691127777, "eval_signal/advantage_pre_scale_std": 0.23668034126361212, "eval_signal/advantage_std": 0.9864044487476349, "eval_signal/brier_reward/centered_abs_mean": 0.16098289688428244, "eval_signal/brier_reward/group_std_mean": 0.21572668353716531, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06839290571709473, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.016098289905736845, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.045053947096069656, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06295228935778141, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.01909668557345867, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004505394880349438, "eval_signal/format_reward/centered_abs_mean": 0.009982638681928316, "eval_signal/format_reward/group_std_mean": 0.026473373795549076, "eval_signal/format_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02075567903618018, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.004991319340964158, "eval_signal/frontier_coverage_0/centered_abs_mean": 0.2538191005587578, "eval_signal/frontier_coverage_0/group_std_mean": 0.3522955924272537, "eval_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015475187761088213, "eval_signal/frontier_coverage_0/weight": 0.014299999922513962, "eval_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0036296132020652294, "eval_signal/frontier_coverage_1/centered_abs_mean": 0.2536289890607198, "eval_signal/frontier_coverage_1/group_std_mean": 0.35204460720221203, "eval_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.015463725663721561, "eval_signal/frontier_coverage_1/weight": 0.014299999922513962, "eval_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003626894555054605, "eval_signal/frontier_coverage_10/centered_abs_mean": 0.08963375041882198, "eval_signal/frontier_coverage_10/group_std_mean": 0.11314565564195316, "eval_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.005451245854298274, "eval_signal/frontier_coverage_10/weight": 0.014299999922513962, "eval_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0012817625926497083, "eval_signal/frontier_coverage_15/centered_abs_mean": 0.17246426890293756, "eval_signal/frontier_coverage_15/group_std_mean": 0.21190873285134634, "eval_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.010484680533409119, "eval_signal/frontier_coverage_15/weight": 0.014299999922513962, "eval_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.002466239112739762, "eval_signal/frontier_coverage_20/centered_abs_mean": 0.2715127418438594, "eval_signal/frontier_coverage_20/group_std_mean": 0.33038956423600513, "eval_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.0165048170213898, "eval_signal/frontier_coverage_20/weight": 0.014299999922513962, "eval_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003882632163974146, "eval_signal/frontier_coverage_25/centered_abs_mean": 0.345558096965154, "eval_signal/frontier_coverage_25/group_std_mean": 0.4196178962786992, "eval_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.021006828794876736, "eval_signal/frontier_coverage_25/weight": 0.014299999922513962, "eval_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004941480699926615, "eval_signal/frontier_coverage_5/centered_abs_mean": 0.22168447573979697, "eval_signal/frontier_coverage_5/group_std_mean": 0.31111370027065277, "eval_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "eval_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013517022288093964, "eval_signal/frontier_coverage_5/weight": 0.014299999922513962, "eval_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.003170088049955666, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.009982638681928316, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.026473373795549076, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8611111342906952, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004151135838280122, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0009982639458030462, "eval_steps_per_second": 0.027, "step": 1000 }, { "epoch": 2.4031949600629994, "step": 1000, "train_probe_completions/clipped_ratio": 0.00434027777777779, "train_probe_completions/max_length": 3769.3333333333335, "train_probe_completions/max_terminated_length": 3769.3333333333335, "train_probe_completions/mean_length": 1853.1814575195312, "train_probe_completions/mean_terminated_length": 1861.4977620442708, "train_probe_completions/min_length": 473.6666666666667, "train_probe_completions/min_terminated_length": 669.0, "train_probe_loss": 0.0, "train_probe_num_tokens": 3045098126.0, "train_probe_reward": 0.969299187262853, "train_probe_reward_std": 0.21884569774071375, "train_probe_rewards/accuracy_reward": 0.7690972288449606, "train_probe_rewards/brier_reward": 0.8417787551879883, "train_probe_rewards/confidence_uniqueness_reward": 0.8890791237354279, "train_probe_rewards/format_reward": 0.9956597288449606, "train_probe_rewards/frontier_coverage_0": -0.00047506617071727913, "train_probe_rewards/frontier_coverage_1": -0.00046507261383036774, "train_probe_rewards/frontier_coverage_10": 0.09062495206793149, "train_probe_rewards/frontier_coverage_15": 0.1806212936838468, "train_probe_rewards/frontier_coverage_20": 0.2890334626038869, "train_probe_rewards/frontier_coverage_25": 0.3692951550086339, "train_probe_rewards/frontier_coverage_5": 0.008488837241505584, "train_probe_rewards/frontier_entropy_batch_reward": -0.9956597288449606, "train_probe_runtime": 220.0235, "train_probe_samples_per_second": 4.545, "train_probe_signal/accuracy_reward/centered_abs_mean": 0.3460286458333333, "train_probe_signal/accuracy_reward/group_std_mean": 0.4203969786564509, "train_probe_signal/accuracy_reward/group_zero_std_frac": 0.0, "train_probe_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8049575587113699, "train_probe_signal/accuracy_reward/weight": 0.5, "train_probe_signal/accuracy_reward/weighted_centered_abs_mean": 0.17301432291666666, "train_probe_signal/advantage_abs_mean": 0.8016128440697988, "train_probe_signal/advantage_pre_scale_abs_mean": 0.17688036213318506, "train_probe_signal/advantage_pre_scale_std": 0.2180818368991216, "train_probe_signal/advantage_std": 0.9863627056280772, "train_probe_signal/brier_reward/centered_abs_mean": 0.14901412775119147, "train_probe_signal/brier_reward/group_std_mean": 0.20425448566675186, "train_probe_signal/brier_reward/group_zero_std_frac": 0.0, "train_probe_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.06965736175576846, "train_probe_signal/brier_reward/weight": 0.10000000149011612, "train_probe_signal/brier_reward/weighted_centered_abs_mean": 0.014901412961383661, "train_probe_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04799235612154007, "train_probe_signal/confidence_uniqueness_reward/group_std_mean": 0.06400722078979015, "train_probe_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "train_probe_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.022248809846738975, "train_probe_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "train_probe_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004799235767374436, "train_probe_signal/format_reward/centered_abs_mean": 0.008083767102410397, "train_probe_signal/format_reward/group_std_mean": 0.018047164815167587, "train_probe_signal/format_reward/group_zero_std_frac": 0.9166666766007742, "train_probe_signal/format_reward/scaled_weighted_centered_abs_mean": 0.017749311091999214, "train_probe_signal/format_reward/weight": 0.5, "train_probe_signal/format_reward/weighted_centered_abs_mean": 0.004041883551205198, "train_probe_signal/frontier_coverage_0/centered_abs_mean": 0.23642443617184958, "train_probe_signal/frontier_coverage_0/group_std_mean": 0.3478074073791504, "train_probe_signal/frontier_coverage_0/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.015785963740199804, "train_probe_signal/frontier_coverage_0/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0033808692436044416, "train_probe_signal/frontier_coverage_1/centered_abs_mean": 0.23625963926315308, "train_probe_signal/frontier_coverage_1/group_std_mean": 0.34759581089019775, "train_probe_signal/frontier_coverage_1/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.01577526455124219, "train_probe_signal/frontier_coverage_1/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_1/weighted_centered_abs_mean": 0.003378512842270235, "train_probe_signal/frontier_coverage_10/centered_abs_mean": 0.09172458325823148, "train_probe_signal/frontier_coverage_10/group_std_mean": 0.11395466451843579, "train_probe_signal/frontier_coverage_10/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.006129148804272215, "train_probe_signal/frontier_coverage_10/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001311661481546859, "train_probe_signal/frontier_coverage_15/centered_abs_mean": 0.170408862332503, "train_probe_signal/frontier_coverage_15/group_std_mean": 0.2058931663632393, "train_probe_signal/frontier_coverage_15/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.011379176595558723, "train_probe_signal/frontier_coverage_15/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0024368467663104334, "train_probe_signal/frontier_coverage_20/centered_abs_mean": 0.26180795580148697, "train_probe_signal/frontier_coverage_20/group_std_mean": 0.31553854048252106, "train_probe_signal/frontier_coverage_20/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.017479725182056427, "train_probe_signal/frontier_coverage_20/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_20/weighted_centered_abs_mean": 0.003743853730460008, "train_probe_signal/frontier_coverage_25/centered_abs_mean": 0.3305613547563553, "train_probe_signal/frontier_coverage_25/group_std_mean": 0.3989069660504659, "train_probe_signal/frontier_coverage_25/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.022070841242869694, "train_probe_signal/frontier_coverage_25/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_25/weighted_centered_abs_mean": 0.004727027301366131, "train_probe_signal/frontier_coverage_5/centered_abs_mean": 0.20856821288665137, "train_probe_signal/frontier_coverage_5/group_std_mean": 0.3089133898417155, "train_probe_signal/frontier_coverage_5/group_zero_std_frac": 0.0, "train_probe_signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.013924311380833387, "train_probe_signal/frontier_coverage_5/weight": 0.014299999922513962, "train_probe_signal/frontier_coverage_5/weighted_centered_abs_mean": 0.002982525465389093, "train_probe_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.008083767102410397, "train_probe_signal/frontier_entropy_batch_reward/group_std_mean": 0.018047164815167587, "train_probe_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.9166666766007742, "train_probe_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0035498624201864004, "train_probe_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "train_probe_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0008083767024800181, "train_probe_steps_per_second": 0.027 }, { "calibration/aurc": 0.049643029974327034, "calibration/batch_distribution_entropy": 0.9358887317593924, "calibration/batch_entropy_100bins": 0.9399941518547305, "calibration/batch_entropy_10bins": 0.9358887317593924, "calibration/batch_entropy_50bins": 0.9483285575618018, "calibration/batch_uniqueness": 0.9475488333796699, "calibration/buffer_distribution_entropy": 0.9649044556260031, "calibration/buffer_entropy_100bins": 0.9816726170018539, "calibration/buffer_entropy_10bins": 0.9649044556260031, "calibration/buffer_entropy_50bins": 0.9788240110362392, "calibration/confidence_entropy": 0.47691834991219684, "calibration/coverage@0%": 0.14653612571448732, "calibration/coverage@1%": 0.16585727453955262, "calibration/coverage@10%": 0.8903184908145743, "calibration/coverage@15%": 0.948948814186249, "calibration/coverage@20%": 0.9921875, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.6731427813257123, "calibration/distribution_entropy_10": 0.9358887317593924, "calibration/distribution_entropy_100": 0.9399941518547305, "calibration/ece": 0.2326867390651095, "calibration/mean_confidence": 0.6246838768994191, "calibration/unique_confidence_per_question": 0.7864583333333333, "calibration/unique_confidences": 302.0, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00538194444444442, "completions/max_length": 4071.8, "completions/max_terminated_length": 4071.8, "completions/mean_length": 1868.3620849609374, "completions/mean_terminated_length": 1878.50234375, "completions/min_length": 0.0, "completions/min_terminated_length": 730.4, "epoch": 2.4151948100648744, "grad_norm": 0.0023259855806827545, "learning_rate": 2.103365384615385e-07, "loss": -0.0129, "num_tokens": 3069731321.0, "reward": 1.052335834503174, "reward_std": 0.09243645370006562, "rewards/accuracy_reward": 0.7889756917953491, "rewards/brier_reward": 0.8465770006179809, "rewards/confidence_uniqueness_reward": 0.9425639510154724, "rewards/format_reward": 0.9947048664093018, "rewards/frontier_coverage_0": -0.0034056782722473146, "rewards/frontier_coverage_1": -0.003370976075530052, "rewards/frontier_coverage_10": 0.09778977483510971, "rewards/frontier_coverage_15": 0.19524939060211183, "rewards/frontier_coverage_20": 0.31109591722488406, "rewards/frontier_coverage_25": 0.390661233663559, "rewards/frontier_coverage_5": 0.003890213742852211, "rewards/frontier_entropy_batch_reward": -0.3260291278362274, "signal/accuracy_reward/centered_abs_mean": 0.1012424036860466, "signal/accuracy_reward/group_std_mean": 0.13545745313167573, "signal/accuracy_reward/group_zero_std_frac": 0.6027777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9566566586494446, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0506212018430233, "signal/advantage_abs_mean": 0.7642920255661011, "signal/advantage_pre_scale_abs_mean": 0.06975356489419937, "signal/advantage_pre_scale_std": 0.12066361606121064, "signal/advantage_std": 0.9825953960418701, "signal/brier_reward/centered_abs_mean": 0.10595771223306656, "signal/brier_reward/group_std_mean": 0.13505614250898362, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20111228227615358, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010595771297812463, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.021636403724551202, "signal/confidence_uniqueness_reward/group_std_mean": 0.03017391674220562, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04097634702920914, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021636404329910875, "signal/format_reward/centered_abs_mean": 0.008251953125, "signal/format_reward/group_std_mean": 0.013600048422813416, "signal/format_reward/group_zero_std_frac": 0.95, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07726817056536675, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0041259765625, "signal/frontier_coverage_0/centered_abs_mean": 0.15206801891326904, "signal/frontier_coverage_0/group_std_mean": 0.19249917864799498, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.04130175411701202, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002174572693184018, "signal/frontier_coverage_1/centered_abs_mean": 0.15204941034317015, "signal/frontier_coverage_1/group_std_mean": 0.19247425198554993, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.04129683375358582, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0021743066143244507, "signal/frontier_coverage_10/centered_abs_mean": 0.07153294831514359, "signal/frontier_coverage_10/group_std_mean": 0.08741232901811599, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.019443374872207642, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010229211766272783, "signal/frontier_coverage_15/centered_abs_mean": 0.10049240291118622, "signal/frontier_coverage_15/group_std_mean": 0.12352342754602433, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.027307916432619095, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014370413497090339, "signal/frontier_coverage_20/centered_abs_mean": 0.13657326996326447, "signal/frontier_coverage_20/group_std_mean": 0.1695472329854965, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03709420412778854, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0019529977347701789, "signal/frontier_coverage_25/centered_abs_mean": 0.16216370463371277, "signal/frontier_coverage_25/group_std_mean": 0.2022677779197693, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04404643550515175, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0023189409635961055, "signal/frontier_coverage_5/centered_abs_mean": 0.12500394135713577, "signal/frontier_coverage_5/group_std_mean": 0.15981516540050505, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03390644751489162, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0017875563353300095, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3108845889568329, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3744558930397034, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5912355601787567, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03108845800161362, "step": 1005 }, { "calibration/aurc": 0.06281418079147369, "calibration/batch_distribution_entropy": 0.9339591481381666, "calibration/batch_entropy_100bins": 0.9388637498613687, "calibration/batch_entropy_10bins": 0.9339591481381666, "calibration/batch_entropy_50bins": 0.9467069144908493, "calibration/batch_uniqueness": 0.946334471147605, "calibration/buffer_distribution_entropy": 0.9648264661719301, "calibration/buffer_entropy_100bins": 0.9816649220461823, "calibration/buffer_entropy_10bins": 0.9648264661719301, "calibration/buffer_entropy_50bins": 0.9787664691781185, "calibration/confidence_entropy": 0.4745061170928304, "calibration/coverage@0%": 0.12934893317023363, "calibration/coverage@1%": 0.255000314806439, "calibration/coverage@10%": 0.7184015285458769, "calibration/coverage@15%": 0.9169174303399737, "calibration/coverage@20%": 0.9853606975082749, "calibration/coverage@25%": 1.0, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.5823624231615304, "calibration/distribution_entropy_10": 0.9339591481381666, "calibration/distribution_entropy_100": 0.9388637498613687, "calibration/ece": 0.18922213956911352, "calibration/mean_confidence": 0.6355573979566707, "calibration/unique_confidence_per_question": 0.7739583333333333, "calibration/unique_confidences": 297.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004166666666666674, "completions/max_length": 4015.2, "completions/max_terminated_length": 4015.2, "completions/mean_length": 1842.25009765625, "completions/mean_terminated_length": 1850.0492431640625, "completions/min_length": 0.0, "completions/min_terminated_length": 612.0, "epoch": 2.4271946600667493, "grad_norm": 0.002243275521323085, "learning_rate": 1.8028846153846153e-07, "loss": -0.0105, "num_tokens": 3094040186.0, "reward": 1.0495539903640747, "reward_std": 0.09262419492006302, "rewards/accuracy_reward": 0.7880208373069764, "rewards/brier_reward": 0.8300612449645997, "rewards/confidence_uniqueness_reward": 0.9446048140525818, "rewards/format_reward": 0.9958333253860474, "rewards/frontier_coverage_0": -0.02462125839665532, "rewards/frontier_coverage_1": -0.02462125839665532, "rewards/frontier_coverage_10": 0.09222666025161744, "rewards/frontier_coverage_15": 0.18372822403907776, "rewards/frontier_coverage_20": 0.2927555561065674, "rewards/frontier_coverage_25": 0.365255606174469, "rewards/frontier_coverage_5": -0.008586358372122049, "rewards/frontier_entropy_batch_reward": -0.3236843585968018, "signal/accuracy_reward/centered_abs_mean": 0.08917100727558136, "signal/accuracy_reward/group_std_mean": 0.12387789636850358, "signal/accuracy_reward/group_zero_std_frac": 0.6277777671813964, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7768993377685547, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04458550363779068, "signal/advantage_abs_mean": 0.7722596883773803, "signal/advantage_pre_scale_abs_mean": 0.06895973756909371, "signal/advantage_pre_scale_std": 0.11722018867731095, "signal/advantage_std": 0.9827241063117981, "signal/brier_reward/centered_abs_mean": 0.10290507674217224, "signal/brier_reward/group_std_mean": 0.1320227026939392, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18089546859264374, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010290507972240449, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02060473933815956, "signal/confidence_uniqueness_reward/group_std_mean": 0.030566220358014106, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03596749491989613, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020604739896953107, "signal/format_reward/centered_abs_mean": 0.0071289062383584675, "signal/format_reward/group_std_mean": 0.013987554050982, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06039946414530277, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0035644531191792337, "signal/frontier_coverage_0/centered_abs_mean": 0.13577041774988174, "signal/frontier_coverage_0/group_std_mean": 0.17625984847545623, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.0340516809374094, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019415169022977351, "signal/frontier_coverage_1/centered_abs_mean": 0.13577041774988174, "signal/frontier_coverage_1/group_std_mean": 0.17625984847545623, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.0340516809374094, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019415169022977351, "signal/frontier_coverage_10/centered_abs_mean": 0.07094871997833252, "signal/frontier_coverage_10/group_std_mean": 0.08687936514616013, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017919499427080154, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010145666543394326, "signal/frontier_coverage_15/centered_abs_mean": 0.10347563177347183, "signal/frontier_coverage_15/group_std_mean": 0.12728520035743712, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026157256960868836, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014797014882788062, "signal/frontier_coverage_20/centered_abs_mean": 0.14272991716861724, "signal/frontier_coverage_20/group_std_mean": 0.17653460204601287, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03607037365436554, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020410379394888876, "signal/frontier_coverage_25/centered_abs_mean": 0.16851746439933776, "signal/frontier_coverage_25/group_std_mean": 0.209286230802536, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.042574727535247804, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024097997695207594, "signal/frontier_coverage_5/centered_abs_mean": 0.10969754308462143, "signal/frontier_coverage_5/group_std_mean": 0.14257647544145585, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02749013453722, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015686748549342156, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3435045719146729, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40446537733078003, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.6084434032440186, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03435045927762985, "step": 1010 }, { "calibration/aurc": 0.09016903645517856, "calibration/batch_distribution_entropy": 0.9466750867360043, "calibration/batch_entropy_100bins": 0.9441836862649339, "calibration/batch_entropy_10bins": 0.9466750867360043, "calibration/batch_entropy_50bins": 0.9534713268522423, "calibration/batch_uniqueness": 0.9486876738949407, "calibration/buffer_distribution_entropy": 0.9640184617127516, "calibration/buffer_entropy_100bins": 0.9813054812950373, "calibration/buffer_entropy_10bins": 0.9640184617127516, "calibration/buffer_entropy_50bins": 0.9783259839610075, "calibration/confidence_entropy": 0.489239218051625, "calibration/coverage@0%": 0.05799001305483028, "calibration/coverage@1%": 0.2185127502175805, "calibration/coverage@10%": 0.5503759355961706, "calibration/coverage@15%": 0.9104431278735434, "calibration/coverage@20%": 0.9597774527360464, "calibration/coverage@25%": 0.9775456919060052, "calibration/coverage@30%": 0.9968668407310706, "calibration/coverage@5%": 0.46811294604003484, "calibration/distribution_entropy_10": 0.9466750867360043, "calibration/distribution_entropy_100": 0.9441836862649339, "calibration/ece": 0.22282431667164934, "calibration/mean_confidence": 0.6071933231554596, "calibration/unique_confidence_per_question": 0.7802083333333333, "calibration/unique_confidences": 299.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009548611111111095, "completions/max_length": 4002.0, "completions/max_terminated_length": 4002.0, "completions/mean_length": 1828.344921875, "completions/mean_terminated_length": 1846.11103515625, "completions/min_length": 123.0, "completions/min_terminated_length": 656.0, "epoch": 2.4391945100686243, "grad_norm": 0.002343389904126525, "learning_rate": 1.5024038461538462e-07, "loss": -0.0218, "num_tokens": 3118195487.0, "reward": 1.0343999147415162, "reward_std": 0.101786407828331, "rewards/accuracy_reward": 0.7589409828186036, "rewards/brier_reward": 0.8267424464225769, "rewards/confidence_uniqueness_reward": 0.940627145767212, "rewards/format_reward": 0.9904513955116272, "rewards/frontier_coverage_0": -0.005277461744844914, "rewards/frontier_coverage_1": -0.005277461744844914, "rewards/frontier_coverage_10": 0.08828879594802856, "rewards/frontier_coverage_15": 0.17405935227870942, "rewards/frontier_coverage_20": 0.27904576659202573, "rewards/frontier_coverage_25": 0.34634585976600646, "rewards/frontier_coverage_5": 0.0002036154270172119, "rewards/frontier_entropy_batch_reward": -0.2957990825176239, "signal/accuracy_reward/centered_abs_mean": 0.09822591096162796, "signal/accuracy_reward/group_std_mean": 0.13754905611276627, "signal/accuracy_reward/group_zero_std_frac": 0.5722222268581391, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.821112871170044, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04911295548081398, "signal/advantage_abs_mean": 0.7582108736038208, "signal/advantage_pre_scale_abs_mean": 0.07610100358724595, "signal/advantage_pre_scale_std": 0.13187426030635835, "signal/advantage_std": 0.9827902913093567, "signal/brier_reward/centered_abs_mean": 0.10396721214056015, "signal/brier_reward/group_std_mean": 0.1354757845401764, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17589681446552277, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010396721586585046, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025327697582542896, "signal/confidence_uniqueness_reward/group_std_mean": 0.03629572652280331, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042280444875359535, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002532769786193967, "signal/format_reward/centered_abs_mean": 0.01250000037252903, "signal/format_reward/group_std_mean": 0.020553291589021683, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10179329812526702, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006250000186264515, "signal/frontier_coverage_0/centered_abs_mean": 0.1361816868185997, "signal/frontier_coverage_0/group_std_mean": 0.18098072111606597, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03291482552886009, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019473981345072388, "signal/frontier_coverage_1/centered_abs_mean": 0.1361816868185997, "signal/frontier_coverage_1/group_std_mean": 0.18098072111606597, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03291482552886009, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019473981345072388, "signal/frontier_coverage_10/centered_abs_mean": 0.06957028657197953, "signal/frontier_coverage_10/group_std_mean": 0.08603480309247971, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.016844875738024713, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.000994855083990842, "signal/frontier_coverage_15/centered_abs_mean": 0.10170601159334183, "signal/frontier_coverage_15/group_std_mean": 0.125798237323761, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02465054877102375, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014543959870934487, "signal/frontier_coverage_20/centered_abs_mean": 0.14266515970230104, "signal/frontier_coverage_20/group_std_mean": 0.17721356749534606, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034593602642416954, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002040111809037626, "signal/frontier_coverage_25/centered_abs_mean": 0.16853521764278412, "signal/frontier_coverage_25/group_std_mean": 0.20990538001060485, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04088501185178757, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410053554922342, "signal/frontier_coverage_5/centered_abs_mean": 0.11141620129346848, "signal/frontier_coverage_5/group_std_mean": 0.14961472749710084, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02694331631064415, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015932516660541296, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.326166045665741, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3929603099822998, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5524879813194274, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03261660411953926, "step": 1015 }, { "calibration/aurc": 0.07510081039220799, "calibration/batch_distribution_entropy": 0.9426630485683243, "calibration/batch_entropy_100bins": 0.9435449820138105, "calibration/batch_entropy_10bins": 0.9426630485683243, "calibration/batch_entropy_50bins": 0.95117591907094, "calibration/batch_uniqueness": 0.9488083222883464, "calibration/buffer_distribution_entropy": 0.9647369329369576, "calibration/buffer_entropy_100bins": 0.9816932900847481, "calibration/buffer_entropy_10bins": 0.9647369329369576, "calibration/buffer_entropy_50bins": 0.9787712595165315, "calibration/confidence_entropy": 0.4916390496712138, "calibration/coverage@0%": 0.28742112536503966, "calibration/coverage@1%": 0.3936729031554595, "calibration/coverage@10%": 0.5427528966131907, "calibration/coverage@15%": 0.9198028074866311, "calibration/coverage@20%": 0.9691510695187165, "calibration/coverage@25%": 0.9930481283422459, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.4725573752228164, "calibration/distribution_entropy_10": 0.9426630485683243, "calibration/distribution_entropy_100": 0.9435449820138105, "calibration/ece": 0.23388155342734285, "calibration/mean_confidence": 0.6157962873395249, "calibration/unique_confidence_per_question": 0.7807291666666666, "calibration/unique_confidences": 299.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.008506944444444465, "completions/max_length": 4062.4, "completions/max_terminated_length": 4062.4, "completions/mean_length": 1823.9856689453125, "completions/mean_terminated_length": 1839.737255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 643.0, "epoch": 2.4511943600704993, "grad_norm": 0.0023279606830328703, "learning_rate": 1.201923076923077e-07, "loss": -0.0238, "num_tokens": 3142309274.0, "reward": 1.0500015020370483, "reward_std": 0.10346215963363647, "rewards/accuracy_reward": 0.7928819537162781, "rewards/brier_reward": 0.821394145488739, "rewards/confidence_uniqueness_reward": 0.941378140449524, "rewards/format_reward": 0.9916666746139526, "rewards/frontier_coverage_0": -0.03479338986799121, "rewards/frontier_coverage_1": -0.03479338986799121, "rewards/frontier_coverage_10": 0.08953844606876374, "rewards/frontier_coverage_15": 0.18047432899475097, "rewards/frontier_coverage_20": 0.29160410165786743, "rewards/frontier_coverage_25": 0.36005922555923464, "rewards/frontier_coverage_5": -0.021287964098155497, "rewards/frontier_entropy_batch_reward": -0.3043049812316895, "signal/accuracy_reward/centered_abs_mean": 0.11208767294883729, "signal/accuracy_reward/group_std_mean": 0.14802851974964143, "signal/accuracy_reward/group_zero_std_frac": 0.5722222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9433816790580749, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05604383647441864, "signal/advantage_abs_mean": 0.7772905588150024, "signal/advantage_pre_scale_abs_mean": 0.07995961681008339, "signal/advantage_pre_scale_std": 0.1342957466840744, "signal/advantage_std": 0.9827840328216553, "signal/brier_reward/centered_abs_mean": 0.11277692764997482, "signal/brier_reward/group_std_mean": 0.14259937554597854, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19096179604530333, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011277692764997483, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.025512998178601264, "signal/confidence_uniqueness_reward/group_std_mean": 0.035593613237142566, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.043344457447528836, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002551299869082868, "signal/format_reward/centered_abs_mean": 0.012912326585501432, "signal/format_reward/group_std_mean": 0.02003680355846882, "signal/format_reward/group_zero_std_frac": 0.9305555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10890447869896888, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006456163292750716, "signal/frontier_coverage_0/centered_abs_mean": 0.1563192069530487, "signal/frontier_coverage_0/group_std_mean": 0.1986013501882553, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03789141923189163, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002235364611260593, "signal/frontier_coverage_1/centered_abs_mean": 0.1563192069530487, "signal/frontier_coverage_1/group_std_mean": 0.1986013501882553, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03789141923189163, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002235364611260593, "signal/frontier_coverage_10/centered_abs_mean": 0.07052088975906372, "signal/frontier_coverage_10/group_std_mean": 0.08744210004806519, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017230145074427126, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.001008448691572994, "signal/frontier_coverage_15/centered_abs_mean": 0.10222131013870239, "signal/frontier_coverage_15/group_std_mean": 0.12673643082380295, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02497452460229397, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014617647510021925, "signal/frontier_coverage_20/centered_abs_mean": 0.14299859404563903, "signal/frontier_coverage_20/group_std_mean": 0.1779682904481888, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.034896204993128774, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.00204487987793982, "signal/frontier_coverage_25/centered_abs_mean": 0.16885097622871398, "signal/frontier_coverage_25/group_std_mean": 0.210318660736084, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04115983694791794, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002414568979293108, "signal/frontier_coverage_5/centered_abs_mean": 0.1253449410200119, "signal/frontier_coverage_5/group_std_mean": 0.16036904454231263, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.03035188913345337, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.00179243260063231, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3257643938064575, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3911713778972626, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5545288920402527, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0325764387845993, "step": 1020 }, { "calibration/aurc": 0.10795433323355885, "calibration/batch_distribution_entropy": 0.954471539417737, "calibration/batch_entropy_100bins": 0.9521645413868827, "calibration/batch_entropy_10bins": 0.954471539417737, "calibration/batch_entropy_50bins": 0.9614803987982079, "calibration/batch_uniqueness": 0.9506998628756316, "calibration/buffer_distribution_entropy": 0.9642103525562659, "calibration/buffer_entropy_100bins": 0.9814568581448638, "calibration/buffer_entropy_10bins": 0.9642103525562659, "calibration/buffer_entropy_50bins": 0.9784806514041853, "calibration/confidence_entropy": 0.4912213062999834, "calibration/coverage@0%": 0.1714014889154245, "calibration/coverage@1%": 0.177656942667606, "calibration/coverage@10%": 0.5418318083204239, "calibration/coverage@15%": 0.5912203710796203, "calibration/coverage@20%": 0.9185154155495979, "calibration/coverage@25%": 0.9672922252010723, "calibration/coverage@30%": 0.979088471849866, "calibration/coverage@5%": 0.3355723281353475, "calibration/distribution_entropy_10": 0.954471539417737, "calibration/distribution_entropy_100": 0.9521645413868827, "calibration/ece": 0.2106986534151861, "calibration/mean_confidence": 0.5992449490097288, "calibration/unique_confidence_per_question": 0.7973958333333333, "calibration/unique_confidences": 306.2, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666674, "completions/max_length": 4053.6, "completions/max_terminated_length": 4053.6, "completions/mean_length": 1831.3671875, "completions/mean_terminated_length": 1840.537353515625, "completions/min_length": 0.0, "completions/min_terminated_length": 570.8, "epoch": 2.4631942100723743, "grad_norm": 0.0021897871047258377, "learning_rate": 9.014423076923076e-08, "loss": -0.0114, "num_tokens": 3166493728.0, "reward": 1.0358627796173097, "reward_std": 0.0949734017252922, "rewards/accuracy_reward": 0.7576388955116272, "rewards/brier_reward": 0.8228162169456482, "rewards/confidence_uniqueness_reward": 0.9453029632568359, "rewards/format_reward": 0.9950520873069764, "rewards/frontier_coverage_0": -0.010437600314617157, "rewards/frontier_coverage_1": -0.010437600314617157, "rewards/frontier_coverage_10": 0.08798636645078659, "rewards/frontier_coverage_15": 0.17348833680152892, "rewards/frontier_coverage_20": 0.2779244124889374, "rewards/frontier_coverage_25": 0.33984118700027466, "rewards/frontier_coverage_5": -0.0003456904669292271, "rewards/frontier_entropy_batch_reward": -0.29564343094825746, "signal/accuracy_reward/centered_abs_mean": 0.09862196296453477, "signal/accuracy_reward/group_std_mean": 0.13283505588769912, "signal/accuracy_reward/group_zero_std_frac": 0.6138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8716125965118409, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04931098148226738, "signal/advantage_abs_mean": 0.768799102306366, "signal/advantage_pre_scale_abs_mean": 0.07138014510273934, "signal/advantage_pre_scale_std": 0.12297854572534561, "signal/advantage_std": 0.982682466506958, "signal/brier_reward/centered_abs_mean": 0.10478228777647018, "signal/brier_reward/group_std_mean": 0.1331074982881546, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1881021410226822, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010478229075670243, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02070125788450241, "signal/confidence_uniqueness_reward/group_std_mean": 0.030240644142031668, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03780189417302608, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020701258908957243, "signal/format_reward/centered_abs_mean": 0.00837131068110466, "signal/format_reward/group_std_mean": 0.014943964406847953, "signal/format_reward/group_zero_std_frac": 0.9416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07720507308840752, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00418565534055233, "signal/frontier_coverage_0/centered_abs_mean": 0.14610558599233628, "signal/frontier_coverage_0/group_std_mean": 0.18713459372520447, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03730213642120361, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0020893098786473273, "signal/frontier_coverage_1/centered_abs_mean": 0.14610558599233628, "signal/frontier_coverage_1/group_std_mean": 0.18713459372520447, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03730213642120361, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0020893098786473273, "signal/frontier_coverage_10/centered_abs_mean": 0.06919336915016175, "signal/frontier_coverage_10/group_std_mean": 0.0849489226937294, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.017900803312659263, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0009894651593640447, "signal/frontier_coverage_15/centered_abs_mean": 0.09901081472635269, "signal/frontier_coverage_15/group_std_mean": 0.12232684940099717, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02565161548554897, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014158546226099133, "signal/frontier_coverage_20/centered_abs_mean": 0.13749481439590455, "signal/frontier_coverage_20/group_std_mean": 0.17131005227565765, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03556998260319233, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.001966175786219537, "signal/frontier_coverage_25/centered_abs_mean": 0.16092342138290405, "signal/frontier_coverage_25/group_std_mean": 0.2009727656841278, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04158979952335358, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002301204949617386, "signal/frontier_coverage_5/centered_abs_mean": 0.1154853418469429, "signal/frontier_coverage_5/group_std_mean": 0.14849618971347808, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.029463668912649156, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016514403512701392, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31661018133163454, "signal/frontier_entropy_batch_reward/group_std_mean": 0.38219080567359925, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5740918219089508, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03166101947426796, "step": 1025 }, { "calibration/aurc": 0.12602570302866045, "calibration/batch_distribution_entropy": 0.9447044630714128, "calibration/batch_entropy_100bins": 0.9450190257070968, "calibration/batch_entropy_10bins": 0.9447044630714128, "calibration/batch_entropy_50bins": 0.9548580318251819, "calibration/batch_uniqueness": 0.9488180854166666, "calibration/buffer_distribution_entropy": 0.9654555801025856, "calibration/buffer_entropy_100bins": 0.982137694828989, "calibration/buffer_entropy_10bins": 0.9654555801025856, "calibration/buffer_entropy_50bins": 0.9792643436738526, "calibration/confidence_entropy": 0.47917161620424187, "calibration/coverage@0%": 0.035741666666666665, "calibration/coverage@1%": 0.13920833333333332, "calibration/coverage@10%": 0.6481708333333333, "calibration/coverage@15%": 0.7228749999999999, "calibration/coverage@20%": 0.8481375, "calibration/coverage@25%": 0.9088416666666668, "calibration/coverage@30%": 0.9526041666666666, "calibration/coverage@5%": 0.29895, "calibration/distribution_entropy_10": 0.9447044630714128, "calibration/distribution_entropy_100": 0.9450190257070968, "calibration/ece": 0.1854570408333333, "calibration/mean_confidence": 0.5931998533333334, "calibration/unique_confidence_per_question": 0.7885416666666667, "calibration/unique_confidences": 302.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.002517361111111116, "completions/max_length": 4025.0, "completions/max_terminated_length": 4025.0, "completions/mean_length": 1796.7112060546874, "completions/mean_terminated_length": 1801.3041748046876, "completions/min_length": 0.0, "completions/min_terminated_length": 648.6, "epoch": 2.4751940600742492, "grad_norm": 0.0023873134050518274, "learning_rate": 6.009615384615386e-08, "loss": -0.0018, "num_tokens": 3190264289.0, "reward": 1.0420259952545166, "reward_std": 0.0918187752366066, "rewards/accuracy_reward": 0.7673611164093017, "rewards/brier_reward": 0.8428851127624511, "rewards/confidence_uniqueness_reward": 0.9457682132720947, "rewards/format_reward": 0.9973958253860473, "rewards/frontier_coverage_0": 0.005533659388311208, "rewards/frontier_coverage_1": 0.005533659388311208, "rewards/frontier_coverage_10": 0.09484143853187561, "rewards/frontier_coverage_15": 0.18530669808387756, "rewards/frontier_coverage_20": 0.29715303182601926, "rewards/frontier_coverage_25": 0.36473381519317627, "rewards/frontier_coverage_5": 0.018825782649219035, "rewards/frontier_entropy_batch_reward": -0.3311637341976166, "signal/accuracy_reward/centered_abs_mean": 0.09390190988779068, "signal/accuracy_reward/group_std_mean": 0.12828511744737625, "signal/accuracy_reward/group_zero_std_frac": 0.6111111044883728, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8142975091934204, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04695095494389534, "signal/advantage_abs_mean": 0.7726901888847351, "signal/advantage_pre_scale_abs_mean": 0.07007159218192101, "signal/advantage_pre_scale_std": 0.11733129620552063, "signal/advantage_std": 0.9827264785766602, "signal/brier_reward/centered_abs_mean": 0.10260952860116959, "signal/brier_reward/group_std_mean": 0.13155785202980042, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17991337776184083, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01026095375418663, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01803735364228487, "signal/confidence_uniqueness_reward/group_std_mean": 0.02397899702191353, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.032087193056941035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0018037353875115514, "signal/format_reward/centered_abs_mean": 0.004112413222901523, "signal/format_reward/group_std_mean": 0.006730147963389754, "signal/format_reward/group_zero_std_frac": 0.975, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.036464104615151885, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0020562066114507616, "signal/frontier_coverage_0/centered_abs_mean": 0.1377152234315872, "signal/frontier_coverage_0/group_std_mean": 0.17628149390220643, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03444495052099228, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.0019693276146426795, "signal/frontier_coverage_1/centered_abs_mean": 0.1377152234315872, "signal/frontier_coverage_1/group_std_mean": 0.17628149390220643, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03444495052099228, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.0019693276146426795, "signal/frontier_coverage_10/centered_abs_mean": 0.07171624302864074, "signal/frontier_coverage_10/group_std_mean": 0.08833199888467788, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018152038007974623, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010255422326736151, "signal/frontier_coverage_15/centered_abs_mean": 0.10491779446601868, "signal/frontier_coverage_15/group_std_mean": 0.1298350602388382, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026540745049715042, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015003244625404477, "signal/frontier_coverage_20/centered_abs_mean": 0.1449584811925888, "signal/frontier_coverage_20/group_std_mean": 0.18116262555122375, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03661221191287041, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020729063078761103, "signal/frontier_coverage_25/centered_abs_mean": 0.16970057189464569, "signal/frontier_coverage_25/group_std_mean": 0.21266899406909942, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04281158521771431, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024267181288450956, "signal/frontier_coverage_5/centered_abs_mean": 0.10611386597156525, "signal/frontier_coverage_5/group_std_mean": 0.13786848783493041, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026468663662672042, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0015174282249063253, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3376332998275757, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3988232672214508, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5964804947376251, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03376332968473435, "step": 1030 }, { "calibration/aurc": 0.07552295319038503, "calibration/batch_distribution_entropy": 0.9133538504038856, "calibration/batch_entropy_100bins": 0.9286687311084607, "calibration/batch_entropy_10bins": 0.9133538504038856, "calibration/batch_entropy_50bins": 0.9350509932222222, "calibration/batch_uniqueness": 0.9436407235880271, "calibration/buffer_distribution_entropy": 0.9652011082369188, "calibration/buffer_entropy_100bins": 0.9820486659725232, "calibration/buffer_entropy_10bins": 0.9652011082369188, "calibration/buffer_entropy_50bins": 0.9791281607291051, "calibration/confidence_entropy": 0.4885732382130474, "calibration/coverage@0%": 0.05645285087719297, "calibration/coverage@1%": 0.187702850877193, "calibration/coverage@10%": 0.6769682017543859, "calibration/coverage@15%": 0.9291940789473685, "calibration/coverage@20%": 0.9579934210526316, "calibration/coverage@25%": 0.9836842105263159, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.558108552631579, "calibration/distribution_entropy_10": 0.9133538504038856, "calibration/distribution_entropy_100": 0.9286687311084607, "calibration/ece": 0.18881821655701753, "calibration/mean_confidence": 0.6570069150219299, "calibration/unique_confidence_per_question": 0.7671874999999999, "calibration/unique_confidences": 294.6, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003472222222222232, "completions/max_length": 4008.2, "completions/max_terminated_length": 4008.2, "completions/mean_length": 1852.3521728515625, "completions/mean_terminated_length": 1858.8249755859374, "completions/min_length": 0.0, "completions/min_terminated_length": 582.2, "epoch": 2.4871939100761242, "grad_norm": 0.002268389565870166, "learning_rate": 3.004807692307693e-08, "loss": -0.0093, "num_tokens": 3214658554.0, "reward": 1.054084062576294, "reward_std": 0.09638428539037705, "rewards/accuracy_reward": 0.7923611044883728, "rewards/brier_reward": 0.8475759506225586, "rewards/confidence_uniqueness_reward": 0.9440353751182556, "rewards/format_reward": 0.9964409828186035, "rewards/frontier_coverage_0": -0.008416316658258437, "rewards/frontier_coverage_1": -0.008416316658258437, "rewards/frontier_coverage_10": 0.10146128982305527, "rewards/frontier_coverage_15": 0.19958109259605408, "rewards/frontier_coverage_20": 0.31772214770317075, "rewards/frontier_coverage_25": 0.390204930305481, "rewards/frontier_coverage_5": -0.0028042953927069902, "rewards/frontier_entropy_batch_reward": -0.3362554252147675, "signal/accuracy_reward/centered_abs_mean": 0.09750434011220932, "signal/accuracy_reward/group_std_mean": 0.13808932304382324, "signal/accuracy_reward/group_zero_std_frac": 0.5722222208976746, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8525140166282654, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.04875217005610466, "signal/advantage_abs_mean": 0.7589220523834228, "signal/advantage_pre_scale_abs_mean": 0.07075085788965225, "signal/advantage_pre_scale_std": 0.12341239303350449, "signal/advantage_std": 0.9827309489250183, "signal/brier_reward/centered_abs_mean": 0.09787903875112533, "signal/brier_reward/group_std_mean": 0.12877348363399505, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1719258725643158, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.009787904098629951, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.020282436907291413, "signal/confidence_uniqueness_reward/group_std_mean": 0.02916300855576992, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03589392341673374, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0020282438024878504, "signal/format_reward/centered_abs_mean": 0.006396484514698386, "signal/format_reward/group_std_mean": 0.012048396095633507, "signal/format_reward/group_zero_std_frac": 0.9527777671813965, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.056607330590486525, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003198242257349193, "signal/frontier_coverage_0/centered_abs_mean": 0.13380325138568877, "signal/frontier_coverage_0/group_std_mean": 0.1769975781440735, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.033473866805434224, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.001913386443629861, "signal/frontier_coverage_1/centered_abs_mean": 0.13380325138568877, "signal/frontier_coverage_1/group_std_mean": 0.1769975781440735, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.033473866805434224, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.001913386443629861, "signal/frontier_coverage_10/centered_abs_mean": 0.07177521139383317, "signal/frontier_coverage_10/group_std_mean": 0.08854316622018814, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.018115800246596335, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010263855452649294, "signal/frontier_coverage_15/centered_abs_mean": 0.10529633611440659, "signal/frontier_coverage_15/group_std_mean": 0.13048094213008882, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.026583028957247734, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0015057375887408853, "signal/frontier_coverage_20/centered_abs_mean": 0.14539185762405396, "signal/frontier_coverage_20/group_std_mean": 0.1816335141658783, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.03668516799807549, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.002079103607684374, "signal/frontier_coverage_25/centered_abs_mean": 0.17040335834026338, "signal/frontier_coverage_25/group_std_mean": 0.21383727490901946, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04297814220190048, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.0024367680307477713, "signal/frontier_coverage_5/centered_abs_mean": 0.10470791161060333, "signal/frontier_coverage_5/group_std_mean": 0.13967494815587997, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.026209026202559473, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0014973230892792345, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33285818696022035, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3948968529701233, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.586955726146698, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033285819739103314, "step": 1035 }, { "calibration/aurc": 0.03982850277120601, "calibration/batch_distribution_entropy": 0.9302262732060675, "calibration/batch_entropy_100bins": 0.9370089211715671, "calibration/batch_entropy_10bins": 0.9302262732060675, "calibration/batch_entropy_50bins": 0.9452935932980246, "calibration/batch_uniqueness": 0.9455695415080067, "calibration/buffer_distribution_entropy": 0.9635296927704001, "calibration/buffer_entropy_100bins": 0.9812695589033449, "calibration/buffer_entropy_10bins": 0.9635296927704001, "calibration/buffer_entropy_50bins": 0.9781823252528057, "calibration/confidence_entropy": 0.48144740059044216, "calibration/coverage@0%": 0.2527846128608924, "calibration/coverage@1%": 0.3907275262467192, "calibration/coverage@10%": 0.8651861876640421, "calibration/coverage@15%": 0.91529281496063, "calibration/coverage@20%": 0.9555077099737532, "calibration/coverage@25%": 0.9942503280839894, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.779650590551181, "calibration/distribution_entropy_10": 0.9302262732060675, "calibration/distribution_entropy_100": 0.9370089211715671, "calibration/ece": 0.1816495291994751, "calibration/mean_confidence": 0.640207217027559, "calibration/unique_confidence_per_question": 0.778125, "calibration/unique_confidences": 298.8, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0052951388888889065, "completions/max_length": 3966.4, "completions/max_terminated_length": 3966.4, "completions/mean_length": 1817.649072265625, "completions/mean_terminated_length": 1827.4730712890625, "completions/min_length": 0.0, "completions/min_terminated_length": 563.0, "epoch": 2.499193760077999, "grad_norm": 0.002182691590860486, "learning_rate": 0.0, "loss": -0.005, "num_tokens": 3238689743.0, "reward": 1.0494820594787597, "reward_std": 0.09593940526247025, "rewards/accuracy_reward": 0.7894965171813965, "rewards/brier_reward": 0.8297081470489502, "rewards/confidence_uniqueness_reward": 0.9430023312568665, "rewards/format_reward": 0.9947048664093018, "rewards/frontier_coverage_0": -0.026781286112964153, "rewards/frontier_coverage_1": -0.026781286112964153, "rewards/frontier_coverage_10": 0.09526022970676422, "rewards/frontier_coverage_15": 0.1908439964056015, "rewards/frontier_coverage_20": 0.3055948555469513, "rewards/frontier_coverage_25": 0.36994290351867676, "rewards/frontier_coverage_5": -0.010583885153755546, "rewards/frontier_entropy_batch_reward": -0.32723876237869265, "signal/accuracy_reward/centered_abs_mean": 0.10203450620174408, "signal/accuracy_reward/group_std_mean": 0.14063651114702225, "signal/accuracy_reward/group_zero_std_frac": 0.575000011920929, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8835547685623169, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05101725310087204, "signal/advantage_abs_mean": 0.7627329468727112, "signal/advantage_pre_scale_abs_mean": 0.0720044381916523, "signal/advantage_pre_scale_std": 0.12101912498474121, "signal/advantage_std": 0.9827530026435852, "signal/brier_reward/centered_abs_mean": 0.10083793252706527, "signal/brier_reward/group_std_mean": 0.12944591790437698, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17524527311325072, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010083793476223946, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.019239266216754914, "signal/confidence_uniqueness_reward/group_std_mean": 0.027547023445367812, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03336756303906441, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019239266403019428, "signal/format_reward/centered_abs_mean": 0.0053222656948491934, "signal/format_reward/group_std_mean": 0.010332421585917472, "signal/format_reward/group_zero_std_frac": 0.9555555582046509, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.045553749427199364, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0026611328474245967, "signal/frontier_coverage_0/centered_abs_mean": 0.14174142628908157, "signal/frontier_coverage_0/group_std_mean": 0.1843595564365387, "signal/frontier_coverage_0/group_zero_std_frac": 0.0, "signal/frontier_coverage_0/scaled_weighted_centered_abs_mean": 0.03516431301832199, "signal/frontier_coverage_0/weight": 0.014299999922513962, "signal/frontier_coverage_0/weighted_centered_abs_mean": 0.002026902325451374, "signal/frontier_coverage_1/centered_abs_mean": 0.14174142628908157, "signal/frontier_coverage_1/group_std_mean": 0.1843595564365387, "signal/frontier_coverage_1/group_zero_std_frac": 0.0, "signal/frontier_coverage_1/scaled_weighted_centered_abs_mean": 0.03516431301832199, "signal/frontier_coverage_1/weight": 0.014299999922513962, "signal/frontier_coverage_1/weighted_centered_abs_mean": 0.002026902325451374, "signal/frontier_coverage_10/centered_abs_mean": 0.07119416147470474, "signal/frontier_coverage_10/group_std_mean": 0.0873028039932251, "signal/frontier_coverage_10/group_zero_std_frac": 0.0, "signal/frontier_coverage_10/scaled_weighted_centered_abs_mean": 0.01772012934088707, "signal/frontier_coverage_10/weight": 0.014299999922513962, "signal/frontier_coverage_10/weighted_centered_abs_mean": 0.0010180765297263862, "signal/frontier_coverage_15/centered_abs_mean": 0.10372219681739807, "signal/frontier_coverage_15/group_std_mean": 0.1279784396290779, "signal/frontier_coverage_15/group_zero_std_frac": 0.0, "signal/frontier_coverage_15/scaled_weighted_centered_abs_mean": 0.02581692300736904, "signal/frontier_coverage_15/weight": 0.014299999922513962, "signal/frontier_coverage_15/weighted_centered_abs_mean": 0.0014832273591309786, "signal/frontier_coverage_20/centered_abs_mean": 0.14510032236576081, "signal/frontier_coverage_20/group_std_mean": 0.1804075062274933, "signal/frontier_coverage_20/group_zero_std_frac": 0.0, "signal/frontier_coverage_20/scaled_weighted_centered_abs_mean": 0.036104710400104524, "signal/frontier_coverage_20/weight": 0.014299999922513962, "signal/frontier_coverage_20/weighted_centered_abs_mean": 0.0020749345421791077, "signal/frontier_coverage_25/centered_abs_mean": 0.1685394436120987, "signal/frontier_coverage_25/group_std_mean": 0.21030722558498383, "signal/frontier_coverage_25/group_zero_std_frac": 0.0, "signal/frontier_coverage_25/scaled_weighted_centered_abs_mean": 0.04193039983510971, "signal/frontier_coverage_25/weight": 0.014299999922513962, "signal/frontier_coverage_25/weighted_centered_abs_mean": 0.002410113997757435, "signal/frontier_coverage_5/centered_abs_mean": 0.11676243543624878, "signal/frontier_coverage_5/group_std_mean": 0.15255253612995148, "signal/frontier_coverage_5/group_zero_std_frac": 0.0, "signal/frontier_coverage_5/scaled_weighted_centered_abs_mean": 0.02898463122546673, "signal/frontier_coverage_5/weight": 0.014299999922513962, "signal/frontier_coverage_5/weighted_centered_abs_mean": 0.0016697028186172247, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3297681212425232, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39378581047058103, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5740349888801575, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03297681398689747, "step": 1040 }, { "epoch": 2.499193760077999, "step": 1040, "total_flos": 0.0, "train_loss": -0.0007628972309668405, "train_runtime": 27173.5486, "train_samples_per_second": 2.76, "train_steps_per_second": 0.038 } ], "logging_steps": 5, "max_steps": 1040, "num_input_tokens_seen": 3238689743, "num_train_epochs": 3, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }