{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.49919376007799904, "eval_steps": 50, "global_step": 208, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "calibration/aurc": 0.5113611524759551, "calibration/batch_distribution_entropy": 0.2727310928788026, "calibration/confidence_entropy": 0.21980770767433938, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.4699071914305725, "calibration/mean_confidence": 0.9158092758403239, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01944444444444444, "completions/max_length": 4048.4, "completions/max_terminated_length": 4048.4, "completions/mean_length": 512.9665771484375, "completions/mean_terminated_length": 523.1480102539062, "completions/min_length": 0.0, "completions/min_terminated_length": 2.0, "epoch": 0.011999850001874977, "grad_norm": 0.008225883357226849, "learning_rate": 5.952380952380953e-07, "loss": 0.0114, "num_tokens": 9023583.0, "reward": 0.4356141984462738, "reward_std": 0.3870311200618744, "rewards/accuracy_reward": 0.26093749403953553, "rewards/batch_coverage_0": 0.008399064699187875, "rewards/batch_coverage_1": 0.008399064699187875, "rewards/batch_coverage_10": 0.025467225164175034, "rewards/batch_coverage_15": 0.037757834792137145, "rewards/batch_coverage_20": 0.06029712557792664, "rewards/batch_coverage_25": 0.07480189204216003, "rewards/batch_coverage_5": 0.015028292080387473, "rewards/brier_reward": 0.3116580307483673, "rewards/confidence_uniqueness_reward": 0.28452879190444946, "rewards/format_reward": 0.5988715291023254, "rewards/frontier_entropy_batch_reward": -0.5720015645027161, "signal/accuracy_reward/centered_abs_mean": 0.3065212666988373, "signal/accuracy_reward/group_std_mean": 0.36596824526786803, "signal/accuracy_reward/group_zero_std_frac": 0.09444444552063942, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.4219505786895752, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15326063334941864, "signal/advantage_abs_mean": 0.8588703870773315, "signal/advantage_pre_scale_abs_mean": 0.3350170612335205, "signal/advantage_pre_scale_std": 0.39084741473197937, "signal/advantage_std": 0.9841934680938721, "signal/batch_coverage_0/centered_abs_mean": 0.016065572574734687, "signal/batch_coverage_0/group_std_mean": 0.03175957277417183, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0006328160583507269, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00022973768063820897, "signal/batch_coverage_1/centered_abs_mean": 0.016065572574734687, "signal/batch_coverage_1/group_std_mean": 0.03175957277417183, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0006328160583507269, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00022973768063820897, "signal/batch_coverage_10/centered_abs_mean": 0.031208885833621025, "signal/batch_coverage_10/group_std_mean": 0.04865182489156723, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.00122649057302624, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00044628706527873876, "signal/batch_coverage_15/centered_abs_mean": 0.045175787433981894, "signal/batch_coverage_15/group_std_mean": 0.064193245023489, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0017749476246535778, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0006460137956310064, "signal/batch_coverage_20/centered_abs_mean": 0.0776137426495552, "signal/batch_coverage_20/group_std_mean": 0.10128775835037232, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0030479800887405874, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.00110987649532035, "signal/batch_coverage_25/centered_abs_mean": 0.10338385552167892, "signal/batch_coverage_25/group_std_mean": 0.13086765259504318, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.00406456789933145, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0014783891616389155, "signal/batch_coverage_5/centered_abs_mean": 0.020831802859902382, "signal/batch_coverage_5/group_std_mean": 0.03733638674020767, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0008195297850761563, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00029789476830046626, "signal/brier_reward/centered_abs_mean": 0.3176029086112976, "signal/brier_reward/group_std_mean": 0.37121009826660156, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.08744428902864457, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.03176029026508331, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.23409832119941712, "signal/confidence_uniqueness_reward/group_std_mean": 0.28709676265716555, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06445259153842926, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.023409833386540412, "signal/format_reward/centered_abs_mean": 0.43926323652267457, "signal/format_reward/group_std_mean": 0.47427822947502135, "signal/format_reward/group_zero_std_frac": 0.0, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.6049425005912781, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.21963161826133729, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.4491168737411499, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4817074775695801, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.12370291203260422, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.04491168931126595, "step": 5 }, { "calibration/aurc": 0.516741588823447, "calibration/batch_distribution_entropy": 0.2900443053110763, "calibration/confidence_entropy": 0.22429980386921775, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.46433237115415676, "calibration/mean_confidence": 0.9163211270490192, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01562500000000002, "completions/max_length": 3979.0, "completions/max_terminated_length": 3979.0, "completions/mean_length": 470.5865478515625, "completions/mean_terminated_length": 478.19724731445314, "completions/min_length": 0.0, "completions/min_terminated_length": 21.6, "epoch": 0.023999700003749954, "grad_norm": 0.006776896305382252, "learning_rate": 1.1904761904761906e-06, "loss": 0.012, "num_tokens": 17527460.0, "reward": 0.513310170173645, "reward_std": 0.3529686212539673, "rewards/accuracy_reward": 0.2857638895511627, "rewards/batch_coverage_0": 0.00610975744202733, "rewards/batch_coverage_1": 0.00610975744202733, "rewards/batch_coverage_10": 0.02148380195721984, "rewards/batch_coverage_15": 0.027421478927135468, "rewards/batch_coverage_20": 0.040733900666236875, "rewards/batch_coverage_25": 0.0546408973634243, "rewards/batch_coverage_5": 0.008622795436531305, "rewards/brier_reward": 0.3533738672733307, "rewards/confidence_uniqueness_reward": 0.364860337972641, "rewards/format_reward": 0.7322916746139526, "rewards/frontier_entropy_batch_reward": -0.6990230441093445, "signal/accuracy_reward/centered_abs_mean": 0.31860893964767456, "signal/accuracy_reward/group_std_mean": 0.38112024068832395, "signal/accuracy_reward/group_zero_std_frac": 0.055555556900799274, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.48522502183914185, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.15930446982383728, "signal/advantage_abs_mean": 0.8068342566490173, "signal/advantage_pre_scale_abs_mean": 0.2906586229801178, "signal/advantage_pre_scale_std": 0.35583515763282775, "signal/advantage_std": 0.984165096282959, "signal/batch_coverage_0/centered_abs_mean": 0.015117424167692661, "signal/batch_coverage_0/group_std_mean": 0.03145041689276695, "signal/batch_coverage_0/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0006615896127186716, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00021617915772367268, "signal/batch_coverage_1/centered_abs_mean": 0.015117424167692661, "signal/batch_coverage_1/group_std_mean": 0.03145041689276695, "signal/batch_coverage_1/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0006615896127186716, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00021617915772367268, "signal/batch_coverage_10/centered_abs_mean": 0.022562607005238534, "signal/batch_coverage_10/group_std_mean": 0.04156057015061378, "signal/batch_coverage_10/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0009827240835875272, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00032264526817016304, "signal/batch_coverage_15/centered_abs_mean": 0.027966295555233955, "signal/batch_coverage_15/group_std_mean": 0.04787140339612961, "signal/batch_coverage_15/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0012141478480771184, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0003999180218670517, "signal/batch_coverage_20/centered_abs_mean": 0.04405505768954754, "signal/batch_coverage_20/group_std_mean": 0.06593646556138992, "signal/batch_coverage_20/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.001893820520490408, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0006299873115494847, "signal/batch_coverage_25/centered_abs_mean": 0.06627024859189987, "signal/batch_coverage_25/group_std_mean": 0.09130300730466842, "signal/batch_coverage_25/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0028554079122841357, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0009476645383983851, "signal/batch_coverage_5/centered_abs_mean": 0.015587140060961246, "signal/batch_coverage_5/group_std_mean": 0.032326587662100795, "signal/batch_coverage_5/group_zero_std_frac": 0.00555555559694767, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0006837769004050642, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00022289610642474146, "signal/brier_reward/centered_abs_mean": 0.3139029681682587, "signal/brier_reward/group_std_mean": 0.3684773623943329, "signal/brier_reward/group_zero_std_frac": 0.002777777798473835, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.09546843618154525, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.031390297785401346, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.22378431260585785, "signal/confidence_uniqueness_reward/group_std_mean": 0.2793233871459961, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.06795128434896469, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.022378432005643843, "signal/format_reward/centered_abs_mean": 0.3380750954151154, "signal/format_reward/group_std_mean": 0.40706380009651183, "signal/format_reward/group_zero_std_frac": 0.008333333395421505, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.5067545115947724, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.1690375477075577, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3691741645336151, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43443808555603025, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.11110212206840515, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03691741861402988, "step": 10 }, { "calibration/aurc": 0.5431273408506506, "calibration/batch_distribution_entropy": 0.30753752651198607, "calibration/confidence_entropy": 0.25090009054126317, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.0, "calibration/coverage@25%": 0.0, "calibration/coverage@30%": 0.0, "calibration/coverage@5%": 0.0, "calibration/ece": 0.5192780321324778, "calibration/mean_confidence": 0.9103578212120444, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009635416666666674, "completions/max_length": 3870.8, "completions/max_terminated_length": 3870.8, "completions/mean_length": 402.1271728515625, "completions/mean_terminated_length": 406.04705810546875, "completions/min_length": 0.0, "completions/min_terminated_length": 58.8, "epoch": 0.03599955000562493, "grad_norm": 0.0055563561618328094, "learning_rate": 1.7857142857142859e-06, "loss": -0.0366, "num_tokens": 25261949.0, "reward": 0.6459226131439209, "reward_std": 0.2591663062572479, "rewards/accuracy_reward": 0.31822916865348816, "rewards/batch_coverage_0": 0.010474585834890604, "rewards/batch_coverage_1": 0.010474585834890604, "rewards/batch_coverage_10": 0.025091929733753203, "rewards/batch_coverage_15": 0.033538448810577395, "rewards/batch_coverage_20": 0.04148237034678459, "rewards/batch_coverage_25": 0.055399445444345476, "rewards/batch_coverage_5": 0.01288955081254244, "rewards/brier_reward": 0.4329886555671692, "rewards/confidence_uniqueness_reward": 0.5421028196811676, "rewards/format_reward": 0.9553819417953491, "rewards/frontier_entropy_batch_reward": -0.9109981417655945, "signal/accuracy_reward/centered_abs_mean": 0.3211480021476746, "signal/accuracy_reward/group_std_mean": 0.38029863834381106, "signal/accuracy_reward/group_zero_std_frac": 0.0777777798473835, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7604451894760131, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.1605740010738373, "signal/advantage_abs_mean": 0.7640881419181824, "signal/advantage_pre_scale_abs_mean": 0.2078488826751709, "signal/advantage_pre_scale_std": 0.26615193486213684, "signal/advantage_std": 0.9839967846870422, "signal/batch_coverage_0/centered_abs_mean": 0.02042274661362171, "signal/batch_coverage_0/group_std_mean": 0.03724094405770302, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.001394424750469625, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00029204529710114, "signal/batch_coverage_1/centered_abs_mean": 0.02042274661362171, "signal/batch_coverage_1/group_std_mean": 0.03724094405770302, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.001394424750469625, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00029204529710114, "signal/batch_coverage_10/centered_abs_mean": 0.025325803831219674, "signal/batch_coverage_10/group_std_mean": 0.045688331872224805, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0017356230178847908, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0003621589916292578, "signal/batch_coverage_15/centered_abs_mean": 0.030855680257081984, "signal/batch_coverage_15/group_std_mean": 0.05317860469222069, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0020888552302494646, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0004412362352013588, "signal/batch_coverage_20/centered_abs_mean": 0.03871218115091324, "signal/batch_coverage_20/group_std_mean": 0.06303965002298355, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0026147721568122507, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0005535841512028128, "signal/batch_coverage_25/centered_abs_mean": 0.055917789041996, "signal/batch_coverage_25/group_std_mean": 0.0836059644818306, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.003946993872523308, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0007996243657544255, "signal/batch_coverage_5/centered_abs_mean": 0.02079017162322998, "signal/batch_coverage_5/group_std_mean": 0.038049327582120894, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0014218244701623916, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00029729946982115507, "signal/brier_reward/centered_abs_mean": 0.29541789889335635, "signal/brier_reward/group_std_mean": 0.3477012634277344, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.13953412771224977, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.02954179123044014, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.17255294919013978, "signal/confidence_uniqueness_reward/group_std_mean": 0.21755547523498536, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08172692656517029, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.01725529581308365, "signal/format_reward/centered_abs_mean": 0.07890624776482583, "signal/format_reward/group_std_mean": 0.15041253715753555, "signal/format_reward/group_zero_std_frac": 0.3944444447755814, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.17634160071611404, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03945312388241291, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.15291024446487428, "signal/frontier_entropy_batch_reward/group_std_mean": 0.2610882967710495, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.11666666883975267, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.07050499767065048, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.015291025303304196, "step": 15 }, { "calibration/aurc": 0.45601536234914103, "calibration/batch_distribution_entropy": 0.5371427876707766, "calibration/buffer_distribution_entropy": 0.3484450649060329, "calibration/confidence_entropy": 0.3684050618087809, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0, "calibration/coverage@15%": 0.0, "calibration/coverage@20%": 0.013123359580052493, "calibration/coverage@25%": 0.014173228346456693, "calibration/coverage@30%": 0.09238845144356955, "calibration/coverage@5%": 0.0, "calibration/ece": 0.35893722603768324, "calibration/mean_confidence": 0.8490547748862503, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00972222222222221, "completions/max_length": 3925.0, "completions/max_terminated_length": 3925.0, "completions/mean_length": 440.58714599609374, "completions/mean_terminated_length": 444.94862060546876, "completions/min_length": 0.0, "completions/min_terminated_length": 83.6, "epoch": 0.04799940000749991, "grad_norm": 0.005572207737714052, "learning_rate": 2.380952380952381e-06, "loss": -0.0279, "num_tokens": 33451209.0, "reward": 0.7563788414001464, "reward_std": 0.22359791100025178, "rewards/accuracy_reward": 0.44314236640930177, "rewards/batch_coverage_0": 0.04085179083049297, "rewards/batch_coverage_1": 0.04085179083049297, "rewards/batch_coverage_10": 0.09493901133537293, "rewards/batch_coverage_15": 0.10782753825187683, "rewards/batch_coverage_20": 0.1439335286617279, "rewards/batch_coverage_25": 0.16521188020706176, "rewards/batch_coverage_5": 0.05991588160395622, "rewards/brier_reward": 0.582850182056427, "rewards/confidence_uniqueness_reward": 0.6730621218681335, "rewards/format_reward": 0.9874131798744201, "rewards/frontier_entropy_batch_reward": -0.9383567333221435, "signal/accuracy_reward/centered_abs_mean": 0.2931477904319763, "signal/accuracy_reward/group_std_mean": 0.3582261562347412, "signal/accuracy_reward/group_zero_std_frac": 0.09722222313284874, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.951709794998169, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.14657389521598815, "signal/advantage_abs_mean": 0.7565471768379212, "signal/advantage_pre_scale_abs_mean": 0.17789879739284514, "signal/advantage_pre_scale_std": 0.23252987563610078, "signal/advantage_std": 0.9838254809379577, "signal/batch_coverage_0/centered_abs_mean": 0.040375912189483644, "signal/batch_coverage_0/group_std_mean": 0.06719064265489579, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.003825485659763217, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0005773755547124892, "signal/batch_coverage_1/centered_abs_mean": 0.040375912189483644, "signal/batch_coverage_1/group_std_mean": 0.06719064265489579, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.003825485659763217, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0005773755547124892, "signal/batch_coverage_10/centered_abs_mean": 0.05652825087308884, "signal/batch_coverage_10/group_std_mean": 0.0937819629907608, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.005391606315970421, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0008083539898507297, "signal/batch_coverage_15/centered_abs_mean": 0.06388088911771775, "signal/batch_coverage_15/group_std_mean": 0.10372960716485977, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.00607602009549737, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0009134967462159693, "signal/batch_coverage_20/centered_abs_mean": 0.09250121712684631, "signal/batch_coverage_20/group_std_mean": 0.14216586351394653, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.008676209393888712, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0013227674178779125, "signal/batch_coverage_25/centered_abs_mean": 0.1186644583940506, "signal/batch_coverage_25/group_std_mean": 0.1730334848165512, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.011201648693531751, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.00169690172187984, "signal/batch_coverage_5/centered_abs_mean": 0.04421483613550663, "signal/batch_coverage_5/group_std_mean": 0.073546851426363, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.004192000767216086, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0006322721135802567, "signal/brier_reward/centered_abs_mean": 0.24310141503810884, "signal/brier_reward/group_std_mean": 0.296148020029068, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1566994845867157, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.024310142174363135, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.13678000569343568, "signal/confidence_uniqueness_reward/group_std_mean": 0.16919248104095458, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0862940102815628, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.013678001426160335, "signal/format_reward/centered_abs_mean": 0.02312825545668602, "signal/format_reward/group_std_mean": 0.0512887679040432, "signal/format_reward/group_zero_std_frac": 0.7611111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.07421396747231483, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01156412772834301, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.10914571136236191, "signal/frontier_entropy_batch_reward/group_std_mean": 0.215337872505188, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.23888888955116272, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.0710667297244072, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.010914571583271027, "step": 20 }, { "calibration/aurc": 0.3241197541288493, "calibration/batch_distribution_entropy": 0.7326891440981866, "calibration/buffer_distribution_entropy": 0.4592126091407316, "calibration/confidence_entropy": 0.4994222128178114, "calibration/coverage@0%": 0.0, "calibration/coverage@1%": 0.0, "calibration/coverage@10%": 0.0431303389363461, "calibration/coverage@15%": 0.0862882336731882, "calibration/coverage@20%": 0.1499216513704103, "calibration/coverage@25%": 0.3922831111479822, "calibration/coverage@30%": 0.4921012428647086, "calibration/coverage@5%": 0.0, "calibration/ece": 0.16337910087999727, "calibration/mean_confidence": 0.7518155108340762, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.010503472222222232, "completions/max_length": 3511.6, "completions/max_terminated_length": 3511.6, "completions/mean_length": 522.8118041992187, "completions/mean_terminated_length": 528.3630310058594, "completions/min_length": 0.0, "completions/min_terminated_length": 106.2, "epoch": 0.05999925000937488, "grad_norm": 0.0037775952368974686, "learning_rate": 2.9761904761904763e-06, "loss": -0.0267, "num_tokens": 42598449.0, "reward": 0.8629345178604126, "reward_std": 0.20029426813125611, "rewards/accuracy_reward": 0.5723958373069763, "rewards/batch_coverage_0": 0.08039978817105294, "rewards/batch_coverage_1": 0.08039978817105294, "rewards/batch_coverage_10": 0.17986115962266921, "rewards/batch_coverage_15": 0.1960737258195877, "rewards/batch_coverage_20": 0.21463773548603057, "rewards/batch_coverage_25": 0.23300228416919708, "rewards/batch_coverage_5": 0.12557416558265685, "rewards/brier_reward": 0.7168434143066407, "rewards/confidence_uniqueness_reward": 0.7757420778274536, "rewards/format_reward": 0.9881076335906982, "rewards/frontier_entropy_batch_reward": -0.8244805216789246, "signal/accuracy_reward/centered_abs_mean": 0.2552517354488373, "signal/accuracy_reward/group_std_mean": 0.32020156383514403, "signal/accuracy_reward/group_zero_std_frac": 0.1555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9001852035522461, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.12762586772441864, "signal/advantage_abs_mean": 0.7349266171455383, "signal/advantage_pre_scale_abs_mean": 0.15444318056106568, "signal/advantage_pre_scale_std": 0.21327590942382812, "signal/advantage_std": 0.9837662577629089, "signal/batch_coverage_0/centered_abs_mean": 0.06751533597707748, "signal/batch_coverage_0/group_std_mean": 0.09494247138500214, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.006873792223632336, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0009654692490585149, "signal/batch_coverage_1/centered_abs_mean": 0.06751533597707748, "signal/batch_coverage_1/group_std_mean": 0.09494247138500214, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.006873792223632336, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0009654692490585149, "signal/batch_coverage_10/centered_abs_mean": 0.09968591183423996, "signal/batch_coverage_10/group_std_mean": 0.14351416528224945, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010167979542165995, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0014255085028707982, "signal/batch_coverage_15/centered_abs_mean": 0.10670627057552337, "signal/batch_coverage_15/group_std_mean": 0.15375309437513351, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.01088944710791111, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0015258996281772852, "signal/batch_coverage_20/centered_abs_mean": 0.12048476487398148, "signal/batch_coverage_20/group_std_mean": 0.1724111407995224, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.012245130911469459, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0017229320714250208, "signal/batch_coverage_25/centered_abs_mean": 0.13844509273767472, "signal/batch_coverage_25/group_std_mean": 0.19501933455467224, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.01405192855745554, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.001979764853604138, "signal/batch_coverage_5/centered_abs_mean": 0.07986659109592438, "signal/batch_coverage_5/group_std_mean": 0.11289454102516175, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.00815016021952033, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0011420922819525003, "signal/brier_reward/centered_abs_mean": 0.1660325288772583, "signal/brier_reward/group_std_mean": 0.21059280633926392, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.11725894808769226, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016603253595530987, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.12287691384553909, "signal/confidence_uniqueness_reward/group_std_mean": 0.14854514896869658, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08736316412687302, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.012287691608071328, "signal/format_reward/centered_abs_mean": 0.02112087681889534, "signal/format_reward/group_std_mean": 0.03975438773632049, "signal/format_reward/group_zero_std_frac": 0.8416666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0743426114320755, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01056043840944767, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.25322854369878767, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3676407068967819, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.07500000298023224, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.18085478246212006, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.025322853401303292, "step": 25 }, { "calibration/aurc": 0.27955515653207896, "calibration/batch_distribution_entropy": 0.9364864588621179, "calibration/buffer_distribution_entropy": 0.5970358735855067, "calibration/confidence_entropy": 0.5301070483821728, "calibration/coverage@0%": 0.009701449275362319, "calibration/coverage@1%": 0.009701449275362319, "calibration/coverage@10%": 0.015085507246376811, "calibration/coverage@15%": 0.06695507246376811, "calibration/coverage@20%": 0.1699693094629156, "calibration/coverage@25%": 0.34508087266527165, "calibration/coverage@30%": 0.711757724689686, "calibration/coverage@5%": 0.009701449275362319, "calibration/ece": 0.17340574561883063, "calibration/mean_confidence": 0.6036285913200597, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014583333333333327, "completions/max_length": 3165.4, "completions/max_terminated_length": 3165.4, "completions/mean_length": 613.1007080078125, "completions/mean_terminated_length": 622.1764770507813, "completions/min_length": 0.0, "completions/min_terminated_length": 155.4, "epoch": 0.07199910001124986, "grad_norm": 0.0026056095957756042, "learning_rate": 3.5714285714285718e-06, "loss": -0.0293, "num_tokens": 52771289.0, "reward": 0.9426739931106567, "reward_std": 0.18256551921367645, "rewards/accuracy_reward": 0.60546875, "rewards/batch_coverage_0": 0.11707074046134949, "rewards/batch_coverage_1": 0.11707074046134949, "rewards/batch_coverage_10": 0.19703109860420226, "rewards/batch_coverage_15": 0.21080294847488404, "rewards/batch_coverage_20": 0.2307416707277298, "rewards/batch_coverage_25": 0.23985148668289186, "rewards/batch_coverage_5": 0.15007564574480056, "rewards/brier_reward": 0.7217867493629455, "rewards/confidence_uniqueness_reward": 0.9331858634948731, "rewards/format_reward": 0.9842013955116272, "rewards/frontier_entropy_batch_reward": -0.3571417719125748, "signal/accuracy_reward/centered_abs_mean": 0.22742512822151184, "signal/accuracy_reward/group_std_mean": 0.286023223400116, "signal/accuracy_reward/group_zero_std_frac": 0.2361111134290695, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8609166383743286, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.11371256411075592, "signal/advantage_abs_mean": 0.7514987826347351, "signal/advantage_pre_scale_abs_mean": 0.1399205207824707, "signal/advantage_pre_scale_std": 0.19905700385570527, "signal/advantage_std": 0.9837118506431579, "signal/batch_coverage_0/centered_abs_mean": 0.17423317432403565, "signal/batch_coverage_0/group_std_mean": 0.22805773317813874, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.01937145460397005, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0024915344547480345, "signal/batch_coverage_1/centered_abs_mean": 0.17423317432403565, "signal/batch_coverage_1/group_std_mean": 0.22805773317813874, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.01937145460397005, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0024915344547480345, "signal/batch_coverage_10/centered_abs_mean": 0.1992947429418564, "signal/batch_coverage_10/group_std_mean": 0.25936625003814695, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.022066234052181243, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00284991473890841, "signal/batch_coverage_15/centered_abs_mean": 0.201465904712677, "signal/batch_coverage_15/group_std_mean": 0.2615888833999634, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022288778424263002, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00288096247240901, "signal/batch_coverage_20/centered_abs_mean": 0.21147489845752715, "signal/batch_coverage_20/group_std_mean": 0.2743402898311615, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.02333966102451086, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003024091012775898, "signal/batch_coverage_25/centered_abs_mean": 0.2154662013053894, "signal/batch_coverage_25/group_std_mean": 0.2790635824203491, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.02376740947365761, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0030811666045337917, "signal/batch_coverage_5/centered_abs_mean": 0.18447383344173432, "signal/batch_coverage_5/group_std_mean": 0.24043427407741547, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.020505336113274097, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0026379758259281517, "signal/brier_reward/centered_abs_mean": 0.19326081573963166, "signal/brier_reward/group_std_mean": 0.2406743735074997, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1484305217862129, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01932608112692833, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03934527114033699, "signal/confidence_uniqueness_reward/group_std_mean": 0.06451441496610641, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.029453575611114502, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003934527095407247, "signal/format_reward/centered_abs_mean": 0.02638888843357563, "signal/format_reward/group_std_mean": 0.04935496896505356, "signal/format_reward/group_zero_std_frac": 0.7972222328186035, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0981840431690216, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013194444216787815, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3794820189476013, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4454668164253235, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.28796843588352206, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03794820159673691, "step": 30 }, { "calibration/aurc": 0.24226920367701021, "calibration/batch_distribution_entropy": 0.9581315958770873, "calibration/buffer_distribution_entropy": 0.7127915712755234, "calibration/confidence_entropy": 0.4551993687512953, "calibration/coverage@0%": 0.009962875385701033, "calibration/coverage@1%": 0.009962875385701033, "calibration/coverage@10%": 0.04031613014830156, "calibration/coverage@15%": 0.19086098051820694, "calibration/coverage@20%": 0.3238800048827573, "calibration/coverage@25%": 0.6257067655588412, "calibration/coverage@30%": 0.8198433420365536, "calibration/coverage@5%": 0.01675138713504829, "calibration/ece": 0.2122458842995219, "calibration/mean_confidence": 0.5957768150519883, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018055555555555557, "completions/max_length": 3879.2, "completions/max_terminated_length": 3879.2, "completions/mean_length": 649.6018310546875, "completions/mean_terminated_length": 661.6557495117188, "completions/min_length": 0.0, "completions/min_terminated_length": 182.6, "epoch": 0.08399895001312484, "grad_norm": 0.002304090652614832, "learning_rate": 4.166666666666667e-06, "loss": -0.0483, "num_tokens": 63332142.0, "reward": 0.9694664359092713, "reward_std": 0.17354933619499208, "rewards/accuracy_reward": 0.6381944417953491, "rewards/batch_coverage_0": 0.15102842450141907, "rewards/batch_coverage_1": 0.15102842450141907, "rewards/batch_coverage_10": 0.2521698772907257, "rewards/batch_coverage_15": 0.2756618678569794, "rewards/batch_coverage_20": 0.2928420454263687, "rewards/batch_coverage_25": 0.3004423320293427, "rewards/batch_coverage_5": 0.20541033744812012, "rewards/brier_reward": 0.7120538473129272, "rewards/confidence_uniqueness_reward": 0.9275844931602478, "rewards/format_reward": 0.9814236044883728, "rewards/frontier_entropy_batch_reward": -0.27595201730728147, "signal/accuracy_reward/centered_abs_mean": 0.19836153984069824, "signal/accuracy_reward/group_std_mean": 0.25724024176597593, "signal/accuracy_reward/group_zero_std_frac": 0.2888888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8888693213462829, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09918076992034912, "signal/advantage_abs_mean": 0.7374646306037903, "signal/advantage_pre_scale_abs_mean": 0.12931651920080184, "signal/advantage_pre_scale_std": 0.1932118058204651, "signal/advantage_std": 0.983579683303833, "signal/batch_coverage_0/centered_abs_mean": 0.22375875115394592, "signal/batch_coverage_0/group_std_mean": 0.28625078201293946, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02871074862778187, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00319975009188056, "signal/batch_coverage_1/centered_abs_mean": 0.22375875115394592, "signal/batch_coverage_1/group_std_mean": 0.28625078201293946, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02871074862778187, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00319975009188056, "signal/batch_coverage_10/centered_abs_mean": 0.24917038083076476, "signal/batch_coverage_10/group_std_mean": 0.3143121063709259, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03190204724669456, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.003563136560842395, "signal/batch_coverage_15/centered_abs_mean": 0.25848992466926574, "signal/batch_coverage_15/group_std_mean": 0.32483440041542055, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.033208982273936274, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00369640588760376, "signal/batch_coverage_20/centered_abs_mean": 0.2676124572753906, "signal/batch_coverage_20/group_std_mean": 0.3357407510280609, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03440100736916065, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.00382685805670917, "signal/batch_coverage_25/centered_abs_mean": 0.2681353986263275, "signal/batch_coverage_25/group_std_mean": 0.3352727711200714, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03452103175222874, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003834336344152689, "signal/batch_coverage_5/centered_abs_mean": 0.23883576095104217, "signal/batch_coverage_5/group_std_mean": 0.3027981579303741, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03062332086265087, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0034153513144701718, "signal/brier_reward/centered_abs_mean": 0.23339413106441498, "signal/brier_reward/group_std_mean": 0.2831204056739807, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20949692130088807, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.023339413478970526, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04882469177246094, "signal/confidence_uniqueness_reward/group_std_mean": 0.07911910861730576, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04447423368692398, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004882469354197383, "signal/format_reward/centered_abs_mean": 0.03072916641831398, "signal/format_reward/group_std_mean": 0.05717795491218567, "signal/format_reward/group_zero_std_frac": 0.7638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1347813993692398, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01536458320915699, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3622328042984009, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43260804414749143, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3302638798952103, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036223283410072325, "step": 35 }, { "calibration/aurc": 0.2562137876806586, "calibration/batch_distribution_entropy": 0.938866150096856, "calibration/buffer_distribution_entropy": 0.7625007518494724, "calibration/confidence_entropy": 0.48723283405102336, "calibration/coverage@0%": 0.005274540682414698, "calibration/coverage@1%": 0.005274540682414698, "calibration/coverage@10%": 0.026074540682414698, "calibration/coverage@15%": 0.0667716535433071, "calibration/coverage@20%": 0.3302887139107612, "calibration/coverage@25%": 0.5345121572569387, "calibration/coverage@30%": 0.690819009325962, "calibration/coverage@5%": 0.005274540682414698, "calibration/ece": 0.17700430434619707, "calibration/mean_confidence": 0.615831670720631, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01519097222222221, "completions/max_length": 3865.4, "completions/max_terminated_length": 3865.4, "completions/mean_length": 663.9608642578125, "completions/mean_terminated_length": 674.2513427734375, "completions/min_length": 0.0, "completions/min_terminated_length": 179.6, "epoch": 0.09599880001499982, "grad_norm": 0.0024075538385659456, "learning_rate": 4.761904761904762e-06, "loss": -0.0471, "num_tokens": 74100491.0, "reward": 0.9821699857711792, "reward_std": 0.16510820984840394, "rewards/accuracy_reward": 0.6545138955116272, "rewards/batch_coverage_0": 0.18086645603179932, "rewards/batch_coverage_1": 0.18086645603179932, "rewards/batch_coverage_10": 0.2809440016746521, "rewards/batch_coverage_15": 0.29933875799179077, "rewards/batch_coverage_20": 0.3136002361774445, "rewards/batch_coverage_25": 0.3249046623706818, "rewards/batch_coverage_5": 0.23657204806804658, "rewards/brier_reward": 0.7423569560050964, "rewards/confidence_uniqueness_reward": 0.9304731726646424, "rewards/format_reward": 0.9846354126930237, "rewards/frontier_entropy_batch_reward": -0.30672123432159426, "signal/accuracy_reward/centered_abs_mean": 0.1842881917953491, "signal/accuracy_reward/group_std_mean": 0.24733619391918182, "signal/accuracy_reward/group_zero_std_frac": 0.28888889849185945, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8925943851470948, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09214409589767455, "signal/advantage_abs_mean": 0.7237564325332642, "signal/advantage_pre_scale_abs_mean": 0.12071978598833084, "signal/advantage_pre_scale_std": 0.18614649176597595, "signal/advantage_std": 0.9835057854652405, "signal/batch_coverage_0/centered_abs_mean": 0.19749594628810882, "signal/batch_coverage_0/group_std_mean": 0.2580998420715332, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.027659310027956963, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0028241920750588178, "signal/batch_coverage_1/centered_abs_mean": 0.19749594628810882, "signal/batch_coverage_1/group_std_mean": 0.2580998420715332, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.027659310027956963, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0028241920750588178, "signal/batch_coverage_10/centered_abs_mean": 0.2273343801498413, "signal/batch_coverage_10/group_std_mean": 0.29257852435112, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03175325132906437, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.003250881750136614, "signal/batch_coverage_15/centered_abs_mean": 0.2346246600151062, "signal/batch_coverage_15/group_std_mean": 0.3009405076503754, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03282949589192867, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003355132741853595, "signal/batch_coverage_20/centered_abs_mean": 0.24077640175819398, "signal/batch_coverage_20/group_std_mean": 0.307919579744339, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.033719856292009354, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003443102678284049, "signal/batch_coverage_25/centered_abs_mean": 0.24552258849143982, "signal/batch_coverage_25/group_std_mean": 0.31313495635986327, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.034370492398738864, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0035109729506075382, "signal/batch_coverage_5/centered_abs_mean": 0.2139654278755188, "signal/batch_coverage_5/group_std_mean": 0.2763186156749725, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02990221492946148, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00305970567278564, "signal/brier_reward/centered_abs_mean": 0.2017700344324112, "signal/brier_reward/group_std_mean": 0.25099734365940096, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1979275941848755, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020177004113793372, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04290001392364502, "signal/confidence_uniqueness_reward/group_std_mean": 0.06807700842618943, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04234274849295616, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004290001420304179, "signal/format_reward/centered_abs_mean": 0.02630750872194767, "signal/format_reward/group_std_mean": 0.04854954332113266, "signal/format_reward/group_zero_std_frac": 0.8027778029441833, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.12644327580928802, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.013153754360973834, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.37426209449768066, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4400992035865784, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.36859245896339415, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.037426209449768065, "step": 40 }, { "calibration/aurc": 0.18634576408880263, "calibration/batch_distribution_entropy": 0.9119223904372665, "calibration/buffer_distribution_entropy": 0.802208760829016, "calibration/confidence_entropy": 0.4703543439728567, "calibration/coverage@0%": 0.010027763944112675, "calibration/coverage@1%": 0.010027763944112675, "calibration/coverage@10%": 0.08765064263197066, "calibration/coverage@15%": 0.27478578523672303, "calibration/coverage@20%": 0.7562270949253943, "calibration/coverage@25%": 0.9463833675094918, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.028304526346201446, "calibration/ece": 0.13231561302728326, "calibration/mean_confidence": 0.6588306208595136, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019357638888888907, "completions/max_length": 3533.2, "completions/max_terminated_length": 3533.2, "completions/mean_length": 666.63681640625, "completions/mean_terminated_length": 679.785791015625, "completions/min_length": 0.0, "completions/min_terminated_length": 180.4, "epoch": 0.1079986500168748, "grad_norm": 0.0027716143522411585, "learning_rate": 4.909638554216868e-06, "loss": -0.0528, "num_tokens": 84915411.0, "reward": 0.9768375635147095, "reward_std": 0.16755106151103974, "rewards/accuracy_reward": 0.6585069417953491, "rewards/batch_coverage_0": 0.20247699320316315, "rewards/batch_coverage_1": 0.20247699320316315, "rewards/batch_coverage_10": 0.30868937373161315, "rewards/batch_coverage_15": 0.3283854365348816, "rewards/batch_coverage_20": 0.34293708205223083, "rewards/batch_coverage_25": 0.3498722016811371, "rewards/batch_coverage_5": 0.2585438132286072, "rewards/brier_reward": 0.7502263784408569, "rewards/confidence_uniqueness_reward": 0.9214388489723205, "rewards/format_reward": 0.9802951455116272, "rewards/frontier_entropy_batch_reward": -0.382354199886322, "signal/accuracy_reward/centered_abs_mean": 0.18536241352558136, "signal/accuracy_reward/group_std_mean": 0.24444203674793244, "signal/accuracy_reward/group_zero_std_frac": 0.30555556416511537, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9499078631401062, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09268120676279068, "signal/advantage_abs_mean": 0.7305899024009704, "signal/advantage_pre_scale_abs_mean": 0.12448228895664215, "signal/advantage_pre_scale_std": 0.19205167889595032, "signal/advantage_std": 0.9834497570991516, "signal/batch_coverage_0/centered_abs_mean": 0.18731668293476106, "signal/batch_coverage_0/group_std_mean": 0.25060613453388214, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.027264073491096497, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0026786285918205975, "signal/batch_coverage_1/centered_abs_mean": 0.18731668293476106, "signal/batch_coverage_1/group_std_mean": 0.25060613453388214, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.027264073491096497, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0026786285918205975, "signal/batch_coverage_10/centered_abs_mean": 0.22031475007534027, "signal/batch_coverage_10/group_std_mean": 0.291497939825058, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03219587206840515, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0031505009159445764, "signal/batch_coverage_15/centered_abs_mean": 0.22723614275455475, "signal/batch_coverage_15/group_std_mean": 0.29923227429389954, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.033339572697877885, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0032494768034666776, "signal/batch_coverage_20/centered_abs_mean": 0.23478021323680878, "signal/batch_coverage_20/group_std_mean": 0.30784116983413695, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.0344635047018528, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0033573569264262913, "signal/batch_coverage_25/centered_abs_mean": 0.24096759259700776, "signal/batch_coverage_25/group_std_mean": 0.3143567681312561, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03543527238070965, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034458366688340904, "signal/batch_coverage_5/centered_abs_mean": 0.2049874782562256, "signal/batch_coverage_5/group_std_mean": 0.2722942590713501, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.029881338775157928, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0029313208535313605, "signal/brier_reward/centered_abs_mean": 0.19711728096008302, "signal/brier_reward/group_std_mean": 0.24602427780628205, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20185837149620056, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.019711730256676673, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04846069440245628, "signal/confidence_uniqueness_reward/group_std_mean": 0.07283977195620536, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05067523345351219, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004846069682389498, "signal/format_reward/centered_abs_mean": 0.03095160573720932, "signal/format_reward/group_std_mean": 0.052476833760738376, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.16123647689819337, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01547580286860466, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.39955584406852723, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4637088716030121, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41296189427375796, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.039955586194992065, "step": 45 }, { "calibration/aurc": 0.3653812543339875, "calibration/batch_distribution_entropy": 0.9548606722830435, "calibration/buffer_distribution_entropy": 0.8285173590046464, "calibration/confidence_entropy": 0.4608581055976243, "calibration/coverage@0%": 0.006972364490113031, "calibration/coverage@1%": 0.006972364490113031, "calibration/coverage@10%": 0.006972364490113031, "calibration/coverage@15%": 0.006972364490113031, "calibration/coverage@20%": 0.06575440902969278, "calibration/coverage@25%": 0.2739157656296435, "calibration/coverage@30%": 0.3610937358143598, "calibration/coverage@5%": 0.006972364490113031, "calibration/ece": 0.19754022596238457, "calibration/mean_confidence": 0.5781760003123844, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.020138888888888883, "completions/max_length": 3450.4, "completions/max_terminated_length": 3450.4, "completions/mean_length": 702.4233520507812, "completions/mean_terminated_length": 716.8848022460937, "completions/min_length": 0.0, "completions/min_terminated_length": 167.4, "epoch": 0.11999850001874976, "grad_norm": 0.0027710706926882267, "learning_rate": 4.759036144578314e-06, "loss": -0.0563, "num_tokens": 96104928.0, "reward": 0.9751698017120362, "reward_std": 0.1612878292798996, "rewards/accuracy_reward": 0.6449652671813965, "rewards/batch_coverage_0": 0.22631794810295106, "rewards/batch_coverage_1": 0.22631794810295106, "rewards/batch_coverage_10": 0.3063993453979492, "rewards/batch_coverage_15": 0.32116289138793946, "rewards/batch_coverage_20": 0.335969477891922, "rewards/batch_coverage_25": 0.3402317225933075, "rewards/batch_coverage_5": 0.2677540272474289, "rewards/brier_reward": 0.7310540556907654, "rewards/confidence_uniqueness_reward": 0.9254271507263183, "rewards/format_reward": 0.9791666626930237, "rewards/frontier_entropy_batch_reward": -0.31489698886871337, "signal/accuracy_reward/centered_abs_mean": 0.1730685740709305, "signal/accuracy_reward/group_std_mean": 0.22485636174678802, "signal/accuracy_reward/group_zero_std_frac": 0.37500000596046446, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9425517797470093, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08653428703546524, "signal/advantage_abs_mean": 0.7427342653274536, "signal/advantage_pre_scale_abs_mean": 0.12028573453426361, "signal/advantage_pre_scale_std": 0.18794769048690796, "signal/advantage_std": 0.9833948612213135, "signal/batch_coverage_0/centered_abs_mean": 0.20805572867393493, "signal/batch_coverage_0/group_std_mean": 0.2680402934551239, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.032111577689647675, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0029751969035714866, "signal/batch_coverage_1/centered_abs_mean": 0.20805572867393493, "signal/batch_coverage_1/group_std_mean": 0.2680402934551239, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.032111577689647675, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0029751969035714866, "signal/batch_coverage_10/centered_abs_mean": 0.23025363981723784, "signal/batch_coverage_10/group_std_mean": 0.29348798990249636, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.035579219460487366, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.003292627027258277, "signal/batch_coverage_15/centered_abs_mean": 0.23427854776382445, "signal/batch_coverage_15/group_std_mean": 0.29840022325515747, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03621581345796585, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003350183181464672, "signal/batch_coverage_20/centered_abs_mean": 0.24115452468395232, "signal/batch_coverage_20/group_std_mean": 0.3071499466896057, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03732352778315544, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0034485096111893654, "signal/batch_coverage_25/centered_abs_mean": 0.24156874120235444, "signal/batch_coverage_25/group_std_mean": 0.30771451592445376, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03742283582687378, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034544329158961774, "signal/batch_coverage_5/centered_abs_mean": 0.2209177404642105, "signal/batch_coverage_5/group_std_mean": 0.2820799022912979, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.034112075716257094, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0031591235660016538, "signal/brier_reward/centered_abs_mean": 0.20708931982517242, "signal/brier_reward/group_std_mean": 0.2561024874448776, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.22439938485622407, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.020708932355046273, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04774502441287041, "signal/confidence_uniqueness_reward/group_std_mean": 0.07586067467927933, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05263256207108498, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004774502851068974, "signal/format_reward/centered_abs_mean": 0.03421224020421505, "signal/format_reward/group_std_mean": 0.060324309021234514, "signal/format_reward/group_zero_std_frac": 0.7666666626930236, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18888653069734573, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.017106120102107526, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3588499128818512, "signal/frontier_entropy_batch_reward/group_std_mean": 0.42804943919181826, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3920546770095825, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03588499128818512, "step": 50 }, { "epoch": 0.11999850001874976, "eval_calibration/aurc": 0.2501744326434537, "eval_calibration/batch_distribution_entropy": 0.8360048238344385, "eval_calibration/buffer_distribution_entropy": 0.8410969664986397, "eval_calibration/confidence_entropy": 0.5071609062611472, "eval_calibration/coverage@0%": 0.1490255376344086, "eval_calibration/coverage@1%": 0.1490255376344086, "eval_calibration/coverage@10%": 0.20732526881720428, "eval_calibration/coverage@15%": 0.36710349462365593, "eval_calibration/coverage@20%": 0.44690860215053757, "eval_calibration/coverage@25%": 0.5913978494623656, "eval_calibration/coverage@30%": 0.7911626344086021, "eval_calibration/coverage@5%": 0.1490255376344086, "eval_calibration/ece": 0.2197660028658773, "eval_calibration/mean_confidence": 0.6813664740767263, "eval_completions/clipped_ratio": 0.02760416666666669, "eval_completions/max_length": 2184.3333333333335, "eval_completions/max_terminated_length": 2184.3333333333335, "eval_completions/mean_length": 685.631103515625, "eval_completions/mean_terminated_length": 704.9729817708334, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 257.1666666666667, "eval_loss": 0.0, "eval_num_tokens": 96104928.0, "eval_reward": 0.8819248378276825, "eval_reward_std": 0.281004066268603, "eval_rewards/accuracy_reward": 0.6545138855775198, "eval_rewards/batch_coverage_0": 0.005231775964299838, "eval_rewards/batch_coverage_1": 0.005231775964299838, "eval_rewards/batch_coverage_10": 0.015199587369958559, "eval_rewards/batch_coverage_15": 0.03218171435097853, "eval_rewards/batch_coverage_20": 0.08130176406727212, "eval_rewards/batch_coverage_25": 0.11032332324733336, "eval_rewards/batch_coverage_5": 0.005231775964299838, "eval_rewards/brier_reward": 0.7671490510304769, "eval_rewards/confidence_uniqueness_reward": 0.8576907813549042, "eval_rewards/format_reward": 0.9713541766007742, "eval_rewards/frontier_entropy_batch_reward": -0.9713541766007742, "eval_runtime": 212.1586, "eval_samples_per_second": 4.713, "eval_signal/accuracy_reward/centered_abs_mean": 0.4370659738779068, "eval_signal/accuracy_reward/group_std_mean": 0.473753089706103, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7891835272312164, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2185329869389534, "eval_signal/advantage_abs_mean": 0.8707231283187866, "eval_signal/advantage_pre_scale_abs_mean": 0.24482331921656927, "eval_signal/advantage_pre_scale_std": 0.27929239471753436, "eval_signal/advantage_std": 0.9864674607912699, "eval_signal/batch_coverage_0/centered_abs_mean": 0.11891523251930873, "eval_signal/batch_coverage_0/group_std_mean": 0.18859038750330606, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.00614167214371264, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0017004877833339076, "eval_signal/batch_coverage_1/centered_abs_mean": 0.11891523251930873, "eval_signal/batch_coverage_1/group_std_mean": 0.18859038750330606, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.00614167214371264, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0017004877833339076, "eval_signal/batch_coverage_10/centered_abs_mean": 0.10200798759857814, "eval_signal/batch_coverage_10/group_std_mean": 0.15644440179069838, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.005255104896302025, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.0014587142504751682, "eval_signal/batch_coverage_15/centered_abs_mean": 0.1067725345492363, "eval_signal/batch_coverage_15/group_std_mean": 0.15877163410186768, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.005520270516475041, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.001526847171286742, "eval_signal/batch_coverage_20/centered_abs_mean": 0.1338308664659659, "eval_signal/batch_coverage_20/group_std_mean": 0.18896765261888504, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.006970916486655672, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.00191378133604303, "eval_signal/batch_coverage_25/centered_abs_mean": 0.15927699456612268, "eval_signal/batch_coverage_25/group_std_mean": 0.2141042004028956, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.008288138701270023, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.002277661037320892, "eval_signal/batch_coverage_5/centered_abs_mean": 0.11891523251930873, "eval_signal/batch_coverage_5/group_std_mean": 0.18859038750330606, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.00614167214371264, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0017004877833339076, "eval_signal/brier_reward/centered_abs_mean": 0.2128459538022677, "eval_signal/brier_reward/group_std_mean": 0.27014947930971783, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07686047628521919, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.02128459544231494, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07862282978991668, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.14259813353419304, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.028277449930707615, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00786228283929328, "eval_signal/format_reward/centered_abs_mean": 0.054307724349200726, "eval_signal/format_reward/group_std_mean": 0.1340879499912262, "eval_signal/format_reward/group_zero_std_frac": 0.3333333432674408, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.09734798781573772, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.027153862174600363, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.054307724349200726, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.1340879499912262, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.3333333432674408, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019469597997764748, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005430772861776252, "eval_steps_per_second": 0.028, "step": 50 }, { "calibration/aurc": 0.2666057163044536, "calibration/batch_distribution_entropy": 0.9119153030575751, "calibration/buffer_distribution_entropy": 0.8477103549632282, "calibration/confidence_entropy": 0.5351193885844949, "calibration/coverage@0%": 0.008132351368401148, "calibration/coverage@1%": 0.008132351368401148, "calibration/coverage@10%": 0.08364484499410232, "calibration/coverage@15%": 0.17373565065897462, "calibration/coverage@20%": 0.303553484435861, "calibration/coverage@25%": 0.5082058493847523, "calibration/coverage@30%": 0.5936117061911805, "calibration/coverage@5%": 0.008132351368401148, "calibration/ece": 0.13232012055932668, "calibration/mean_confidence": 0.6377276248857202, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.030729166666666696, "completions/max_length": 3737.2, "completions/max_terminated_length": 3737.2, "completions/mean_length": 740.2117309570312, "completions/mean_terminated_length": 764.018017578125, "completions/min_length": 0.0, "completions/min_terminated_length": 132.4, "epoch": 0.13199835002062474, "grad_norm": 0.002537653548642993, "learning_rate": 4.60843373493976e-06, "loss": -0.0787, "num_tokens": 107712743.0, "reward": 0.9625433802604675, "reward_std": 0.1768051266670227, "rewards/accuracy_reward": 0.6413194537162781, "rewards/batch_coverage_0": 0.19119429290294648, "rewards/batch_coverage_1": 0.19119429290294648, "rewards/batch_coverage_10": 0.27686830759048464, "rewards/batch_coverage_15": 0.29585487246513364, "rewards/batch_coverage_20": 0.3131400167942047, "rewards/batch_coverage_25": 0.31867790818214414, "rewards/batch_coverage_5": 0.23185440599918367, "rewards/brier_reward": 0.7527095675468445, "rewards/confidence_uniqueness_reward": 0.9140000581741333, "rewards/format_reward": 0.9682291626930237, "rewards/frontier_entropy_batch_reward": -0.3491052746772766, "signal/accuracy_reward/centered_abs_mean": 0.17724609076976777, "signal/accuracy_reward/group_std_mean": 0.23526398837566376, "signal/accuracy_reward/group_zero_std_frac": 0.33333333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8493547916412354, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08862304538488389, "signal/advantage_abs_mean": 0.7360433101654053, "signal/advantage_pre_scale_abs_mean": 0.13109518587589264, "signal/advantage_pre_scale_std": 0.2053077608346939, "signal/advantage_std": 0.9835181951522827, "signal/batch_coverage_0/centered_abs_mean": 0.15323663353919983, "signal/batch_coverage_0/group_std_mean": 0.19757155179977418, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.020985569804906845, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00219128392636776, "signal/batch_coverage_1/centered_abs_mean": 0.15323663353919983, "signal/batch_coverage_1/group_std_mean": 0.19757155179977418, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.020985569804906845, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00219128392636776, "signal/batch_coverage_10/centered_abs_mean": 0.18423721194267273, "signal/batch_coverage_10/group_std_mean": 0.23655245900154115, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.025256452709436418, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0026345920283347367, "signal/batch_coverage_15/centered_abs_mean": 0.19146005511283876, "signal/batch_coverage_15/group_std_mean": 0.24579738676548005, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.026244521141052246, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00273787877522409, "signal/batch_coverage_20/centered_abs_mean": 0.20497694611549377, "signal/batch_coverage_20/group_std_mean": 0.2633979916572571, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.028137539327144623, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002931170305237174, "signal/batch_coverage_25/centered_abs_mean": 0.20725022852420807, "signal/batch_coverage_25/group_std_mean": 0.266075587272644, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.028393303230404853, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0029636782128363848, "signal/batch_coverage_5/centered_abs_mean": 0.16661872565746308, "signal/batch_coverage_5/group_std_mean": 0.21373461782932282, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.022796943411231042, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023826478514820336, "signal/brier_reward/centered_abs_mean": 0.16861002445220946, "signal/brier_reward/group_std_mean": 0.21469865143299102, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16197701394557953, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.016861002892255783, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05775104984641075, "signal/confidence_uniqueness_reward/group_std_mean": 0.09017271548509598, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.054496601969003675, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005775105208158493, "signal/format_reward/centered_abs_mean": 0.04748263955116272, "signal/format_reward/group_std_mean": 0.07838348224759102, "signal/format_reward/group_zero_std_frac": 0.7166666746139526, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.22152430117130278, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02374131977558136, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3691326975822449, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4334746658802032, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.35583109855651857, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.036913270503282546, "step": 55 }, { "calibration/aurc": 0.2805250612184601, "calibration/batch_distribution_entropy": 0.9082584323337837, "calibration/buffer_distribution_entropy": 0.8582812627318781, "calibration/confidence_entropy": 0.46065529544521266, "calibration/coverage@0%": 0.012173174872665535, "calibration/coverage@1%": 0.012173174872665535, "calibration/coverage@10%": 0.12241878890775326, "calibration/coverage@15%": 0.3207108092812677, "calibration/coverage@20%": 0.45481607243916244, "calibration/coverage@25%": 0.5905705952225476, "calibration/coverage@30%": 0.6680951759050231, "calibration/coverage@5%": 0.01638370118845501, "calibration/ece": 0.15464922341745774, "calibration/mean_confidence": 0.6475694502184807, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04088541666666665, "completions/max_length": 3870.8, "completions/max_terminated_length": 3870.8, "completions/mean_length": 796.7504272460938, "completions/mean_terminated_length": 831.1199584960938, "completions/min_length": 0.0, "completions/min_terminated_length": 160.0, "epoch": 0.14399820002249972, "grad_norm": 0.002131366403773427, "learning_rate": 4.457831325301205e-06, "loss": -0.1066, "num_tokens": 119987884.0, "reward": 0.9501850128173828, "reward_std": 0.19018920361995698, "rewards/accuracy_reward": 0.6277777791023255, "rewards/batch_coverage_0": 0.22274880409240722, "rewards/batch_coverage_1": 0.22274880409240722, "rewards/batch_coverage_10": 0.3174858093261719, "rewards/batch_coverage_15": 0.3349512219429016, "rewards/batch_coverage_20": 0.3502999782562256, "rewards/batch_coverage_25": 0.3587311267852783, "rewards/batch_coverage_5": 0.273887038230896, "rewards/brier_reward": 0.7444848537445068, "rewards/confidence_uniqueness_reward": 0.9000389218330384, "rewards/format_reward": 0.9586805701255798, "rewards/frontier_entropy_batch_reward": -0.3725272506475449, "signal/accuracy_reward/centered_abs_mean": 0.18524305522441864, "signal/accuracy_reward/group_std_mean": 0.24164635241031646, "signal/accuracy_reward/group_zero_std_frac": 0.32500000298023224, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9821744084358215, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.09262152761220932, "signal/advantage_abs_mean": 0.720962119102478, "signal/advantage_pre_scale_abs_mean": 0.1397200971841812, "signal/advantage_pre_scale_std": 0.22300408482551576, "signal/advantage_std": 0.9834036588668823, "signal/batch_coverage_0/centered_abs_mean": 0.17097190916538238, "signal/batch_coverage_0/group_std_mean": 0.2232629120349884, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02595260217785835, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0024448982439935207, "signal/batch_coverage_1/centered_abs_mean": 0.17097190916538238, "signal/batch_coverage_1/group_std_mean": 0.2232629120349884, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02595260217785835, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0024448982439935207, "signal/batch_coverage_10/centered_abs_mean": 0.20441849529743195, "signal/batch_coverage_10/group_std_mean": 0.2626152366399765, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.031098613888025282, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002923184493556619, "signal/batch_coverage_15/centered_abs_mean": 0.21226610839366913, "signal/batch_coverage_15/group_std_mean": 0.2717501163482666, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03228338472545147, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0030354054179042578, "signal/batch_coverage_20/centered_abs_mean": 0.21970059275627135, "signal/batch_coverage_20/group_std_mean": 0.2806521713733673, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.033462189882993695, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003141718450933695, "signal/batch_coverage_25/centered_abs_mean": 0.22630649209022521, "signal/batch_coverage_25/group_std_mean": 0.2887511670589447, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03441586978733539, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003236182779073715, "signal/batch_coverage_5/centered_abs_mean": 0.18746646940708162, "signal/batch_coverage_5/group_std_mean": 0.24161474406719208, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02843271866440773, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0026807705871760845, "signal/brier_reward/centered_abs_mean": 0.1914803385734558, "signal/brier_reward/group_std_mean": 0.23936934769153595, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.2029225081205368, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01914803497493267, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.07778193056583405, "signal/confidence_uniqueness_reward/group_std_mean": 0.12012100070714951, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.08290282860398293, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007778193149715662, "signal/format_reward/centered_abs_mean": 0.0645724818110466, "signal/format_reward/group_std_mean": 0.10592493116855621, "signal/format_reward/group_zero_std_frac": 0.6166666626930237, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.33684843182563784, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0322862409055233, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3714078009128571, "signal/frontier_entropy_batch_reward/group_std_mean": 0.43968183994293214, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.40729886293411255, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03714078143239021, "step": 60 }, { "calibration/aurc": 0.21271399086090503, "calibration/batch_distribution_entropy": 0.968225044162411, "calibration/buffer_distribution_entropy": 0.873241567117009, "calibration/confidence_entropy": 0.46644094116885915, "calibration/coverage@0%": 0.015098270514390218, "calibration/coverage@1%": 0.015098270514390218, "calibration/coverage@10%": 0.2748608190038695, "calibration/coverage@15%": 0.5670735095002177, "calibration/coverage@20%": 0.6353687146386167, "calibration/coverage@25%": 0.7044765332732366, "calibration/coverage@30%": 0.752890016419304, "calibration/coverage@5%": 0.09382167476970939, "calibration/ece": 0.16446162124530828, "calibration/mean_confidence": 0.5560946029883173, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05295138888888891, "completions/max_length": 3882.2, "completions/max_terminated_length": 3882.2, "completions/mean_length": 815.523095703125, "completions/mean_terminated_length": 860.9260864257812, "completions/min_length": 0.0, "completions/min_terminated_length": 186.2, "epoch": 0.1559980500243747, "grad_norm": 0.0024373617488890886, "learning_rate": 4.307228915662651e-06, "loss": -0.1167, "num_tokens": 132476758.0, "reward": 0.9634868264198303, "reward_std": 0.1869693487882614, "rewards/accuracy_reward": 0.6431423544883728, "rewards/batch_coverage_0": 0.23204412460327148, "rewards/batch_coverage_1": 0.23204412460327148, "rewards/batch_coverage_10": 0.32365984916687013, "rewards/batch_coverage_15": 0.3395088195800781, "rewards/batch_coverage_20": 0.35158597230911254, "rewards/batch_coverage_25": 0.35531420111656187, "rewards/batch_coverage_5": 0.28531578183174133, "rewards/brier_reward": 0.7312113642692566, "rewards/confidence_uniqueness_reward": 0.8997768282890319, "rewards/format_reward": 0.946875, "rewards/frontier_entropy_batch_reward": -0.24929134249687196, "signal/accuracy_reward/centered_abs_mean": 0.16935221254825591, "signal/accuracy_reward/group_std_mean": 0.22745457291603088, "signal/accuracy_reward/group_zero_std_frac": 0.33611111640930175, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8086487889289856, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08467610627412796, "signal/advantage_abs_mean": 0.7164683222770691, "signal/advantage_pre_scale_abs_mean": 0.13654183447360993, "signal/advantage_pre_scale_std": 0.21922028064727783, "signal/advantage_std": 0.9835149884223938, "signal/batch_coverage_0/centered_abs_mean": 0.20915255844593048, "signal/batch_coverage_0/group_std_mean": 0.2628923296928406, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.028789347037672997, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002990881586447358, "signal/batch_coverage_1/centered_abs_mean": 0.20915255844593048, "signal/batch_coverage_1/group_std_mean": 0.2628923296928406, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.028789347037672997, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002990881586447358, "signal/batch_coverage_10/centered_abs_mean": 0.23952043652534485, "signal/batch_coverage_10/group_std_mean": 0.2989070534706116, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0329642117023468, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0034251420758664606, "signal/batch_coverage_15/centered_abs_mean": 0.24125888645648957, "signal/batch_coverage_15/group_std_mean": 0.30165609121322634, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03321785070002079, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.003450001962482929, "signal/batch_coverage_20/centered_abs_mean": 0.2430189371109009, "signal/batch_coverage_20/group_std_mean": 0.30439882874488833, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03352968730032444, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003475170908495784, "signal/batch_coverage_25/centered_abs_mean": 0.24278749525547028, "signal/batch_coverage_25/group_std_mean": 0.3051342725753784, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03350915126502514, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003471861220896244, "signal/batch_coverage_5/centered_abs_mean": 0.22716282606124877, "signal/batch_coverage_5/group_std_mean": 0.28373380899429324, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03121436983346939, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.003248428227379918, "signal/brier_reward/centered_abs_mean": 0.21119480431079865, "signal/brier_reward/group_std_mean": 0.2605059534311295, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.20226334631443024, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.021119481325149535, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.08284200727939606, "signal/confidence_uniqueness_reward/group_std_mean": 0.12240892052650451, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07839244678616523, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.008284200355410576, "signal/format_reward/centered_abs_mean": 0.07634548544883728, "signal/format_reward/group_std_mean": 0.1152060568332672, "signal/format_reward/group_zero_std_frac": 0.6111111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.3589187800884247, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03817274272441864, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32150029540061953, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39567927122116087, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3117268800735474, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03215003050863743, "step": 65 }, { "calibration/aurc": 0.24774531330265148, "calibration/batch_distribution_entropy": 0.9211471777051056, "calibration/buffer_distribution_entropy": 0.8814297626198936, "calibration/confidence_entropy": 0.4404385253711066, "calibration/coverage@0%": 0.01250161932596551, "calibration/coverage@1%": 0.01250161932596551, "calibration/coverage@10%": 0.13770487135848583, "calibration/coverage@15%": 0.15402542453616247, "calibration/coverage@20%": 0.28338366903393925, "calibration/coverage@25%": 0.5766635713041601, "calibration/coverage@30%": 0.8610831903159772, "calibration/coverage@5%": 0.08025229683274057, "calibration/ece": 0.14467948400989514, "calibration/mean_confidence": 0.6279437049412072, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03663194444444444, "completions/max_length": 3646.8, "completions/max_terminated_length": 3646.8, "completions/mean_length": 834.01416015625, "completions/mean_terminated_length": 865.7296020507813, "completions/min_length": 0.0, "completions/min_terminated_length": 208.0, "epoch": 0.16799790002624967, "grad_norm": 0.0021353804040700197, "learning_rate": 4.156626506024097e-06, "loss": -0.0899, "num_tokens": 145162745.0, "reward": 0.9690884709358215, "reward_std": 0.17554756104946137, "rewards/accuracy_reward": 0.6421006917953491, "rewards/batch_coverage_0": 0.26493396162986754, "rewards/batch_coverage_1": 0.26493396162986754, "rewards/batch_coverage_10": 0.35353497266769407, "rewards/batch_coverage_15": 0.36810287833213806, "rewards/batch_coverage_20": 0.3775683999061584, "rewards/batch_coverage_25": 0.38541075587272644, "rewards/batch_coverage_5": 0.3158003121614456, "rewards/brier_reward": 0.7648000240325927, "rewards/confidence_uniqueness_reward": 0.9073068022727966, "rewards/format_reward": 0.9630208253860474, "rewards/frontier_entropy_batch_reward": -0.3400608658790588, "signal/accuracy_reward/centered_abs_mean": 0.1756781667470932, "signal/accuracy_reward/group_std_mean": 0.22818847894668579, "signal/accuracy_reward/group_zero_std_frac": 0.3583333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9614694237709045, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0878390833735466, "signal/advantage_abs_mean": 0.7173182845115662, "signal/advantage_pre_scale_abs_mean": 0.12758750170469285, "signal/advantage_pre_scale_std": 0.2087983638048172, "signal/advantage_std": 0.9833806037902832, "signal/batch_coverage_0/centered_abs_mean": 0.18425578474998475, "signal/batch_coverage_0/group_std_mean": 0.23661755323410033, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02887213006615639, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002634857688099146, "signal/batch_coverage_1/centered_abs_mean": 0.18425578474998475, "signal/batch_coverage_1/group_std_mean": 0.23661755323410033, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02887213006615639, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002634857688099146, "signal/batch_coverage_10/centered_abs_mean": 0.20975691378116607, "signal/batch_coverage_10/group_std_mean": 0.2687097519636154, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0329612348228693, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0029995237942785024, "signal/batch_coverage_15/centered_abs_mean": 0.2140837788581848, "signal/batch_coverage_15/group_std_mean": 0.27389532923698423, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03371351547539234, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00306139811873436, "signal/batch_coverage_20/centered_abs_mean": 0.21710529327392578, "signal/batch_coverage_20/group_std_mean": 0.27775133550167086, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03412764519453049, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.003104605805128813, "signal/batch_coverage_25/centered_abs_mean": 0.2247564196586609, "signal/batch_coverage_25/group_std_mean": 0.2869294822216034, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03532760851085186, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0032140168827027083, "signal/batch_coverage_5/centered_abs_mean": 0.19823941290378572, "signal/batch_coverage_5/group_std_mean": 0.25322408974170685, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.031146563962101935, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002834823727607727, "signal/brier_reward/centered_abs_mean": 0.17921520471572877, "signal/brier_reward/group_std_mean": 0.2279728651046753, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19661661088466645, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01792152039706707, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06926488056778908, "signal/confidence_uniqueness_reward/group_std_mean": 0.10625757575035095, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.07679094523191451, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006926488224416971, "signal/format_reward/centered_abs_mean": 0.05738932266831398, "signal/format_reward/group_std_mean": 0.09339729994535446, "signal/format_reward/group_zero_std_frac": 0.6638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.316864949464798, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02869466133415699, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35653844475746155, "signal/frontier_entropy_batch_reward/group_std_mean": 0.426321929693222, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.39354650378227235, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.035653845965862276, "step": 70 }, { "calibration/aurc": 0.1860572061793848, "calibration/batch_distribution_entropy": 0.9246497199562427, "calibration/buffer_distribution_entropy": 0.8883004335264812, "calibration/confidence_entropy": 0.46967255991625756, "calibration/coverage@0%": 0.05187684511478731, "calibration/coverage@1%": 0.08415197738991957, "calibration/coverage@10%": 0.3801901627673189, "calibration/coverage@15%": 0.4447886261393833, "calibration/coverage@20%": 0.5134962946195415, "calibration/coverage@25%": 0.6749903386901988, "calibration/coverage@30%": 0.7981028883421204, "calibration/coverage@5%": 0.19222920355225212, "calibration/ece": 0.13536141780022604, "calibration/mean_confidence": 0.6490086093658063, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017534722222222254, "completions/max_length": 3834.0, "completions/max_terminated_length": 3834.0, "completions/mean_length": 795.0524291992188, "completions/mean_terminated_length": 809.184228515625, "completions/min_length": 0.0, "completions/min_terminated_length": 244.4, "epoch": 0.17999775002812465, "grad_norm": 0.002331721130758524, "learning_rate": 4.006024096385543e-06, "loss": -0.0434, "num_tokens": 157386645.0, "reward": 1.0091155409812926, "reward_std": 0.1520775854587555, "rewards/accuracy_reward": 0.690625011920929, "rewards/batch_coverage_0": 0.2843721121549606, "rewards/batch_coverage_1": 0.2843721121549606, "rewards/batch_coverage_10": 0.37667989134788515, "rewards/batch_coverage_15": 0.3934926211833954, "rewards/batch_coverage_20": 0.4086416959762573, "rewards/batch_coverage_25": 0.41325275897979735, "rewards/batch_coverage_5": 0.3411641776561737, "rewards/brier_reward": 0.8053735375404358, "rewards/confidence_uniqueness_reward": 0.9250438809394836, "rewards/format_reward": 0.9811631917953492, "rewards/frontier_entropy_batch_reward": -0.3559852600097656, "signal/accuracy_reward/centered_abs_mean": 0.16431206464767456, "signal/accuracy_reward/group_std_mean": 0.21887060701847078, "signal/accuracy_reward/group_zero_std_frac": 0.37222222089767454, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9913081049919128, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08215603232383728, "signal/advantage_abs_mean": 0.7215629577636719, "signal/advantage_pre_scale_abs_mean": 0.10871622711420059, "signal/advantage_pre_scale_std": 0.18051161766052246, "signal/advantage_std": 0.9832531571388244, "signal/batch_coverage_0/centered_abs_mean": 0.16481218934059144, "signal/batch_coverage_0/group_std_mean": 0.21140751242637634, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.028905288875102998, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002356814406812191, "signal/batch_coverage_1/centered_abs_mean": 0.16481218934059144, "signal/batch_coverage_1/group_std_mean": 0.21140751242637634, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.028905288875102998, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002356814406812191, "signal/batch_coverage_10/centered_abs_mean": 0.19273822903633117, "signal/batch_coverage_10/group_std_mean": 0.24738204181194307, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03368869572877884, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0027561566326767205, "signal/batch_coverage_15/centered_abs_mean": 0.2002232104539871, "signal/batch_coverage_15/group_std_mean": 0.257281693816185, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.0350172333419323, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0028631918597966433, "signal/batch_coverage_20/centered_abs_mean": 0.20743354558944702, "signal/batch_coverage_20/group_std_mean": 0.26647299230098725, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03612685203552246, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0029662997461855412, "signal/batch_coverage_25/centered_abs_mean": 0.21094645261764527, "signal/batch_coverage_25/group_std_mean": 0.2706577003002167, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0366845540702343, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.003016534401103854, "signal/batch_coverage_5/centered_abs_mean": 0.1806482791900635, "signal/batch_coverage_5/group_std_mean": 0.23169384598731996, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03167664371430874, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00258327042683959, "signal/brier_reward/centered_abs_mean": 0.14588031470775603, "signal/brier_reward/group_std_mean": 0.18990064263343812, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17611936926841737, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014588031731545926, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.044456960260868074, "signal/confidence_uniqueness_reward/group_std_mean": 0.07398699522018433, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05278183743357658, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004445695923641324, "signal/format_reward/centered_abs_mean": 0.03200412318110466, "signal/format_reward/group_std_mean": 0.05958279184997082, "signal/format_reward/group_zero_std_frac": 0.7555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18332911729812623, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01600206159055233, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34321765303611756, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4119159996509552, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.42253615260124205, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03432176485657692, "step": 75 }, { "calibration/aurc": 0.15278728997151786, "calibration/batch_distribution_entropy": 0.9201143720375663, "calibration/buffer_distribution_entropy": 0.8935675122889043, "calibration/confidence_entropy": 0.4637714482116243, "calibration/coverage@0%": 0.047810804511804375, "calibration/coverage@1%": 0.047810804511804375, "calibration/coverage@10%": 0.39640807567155434, "calibration/coverage@15%": 0.6296719106720096, "calibration/coverage@20%": 0.7230256765216778, "calibration/coverage@25%": 0.7885582527268078, "calibration/coverage@30%": 0.9187234042553192, "calibration/coverage@5%": 0.11938975188022542, "calibration/ece": 0.14607550692186017, "calibration/mean_confidence": 0.6258542466649399, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.012152777777777768, "completions/max_length": 3363.8, "completions/max_terminated_length": 3363.8, "completions/mean_length": 798.2267333984375, "completions/mean_terminated_length": 808.1140991210938, "completions/min_length": 0.0, "completions/min_terminated_length": 201.8, "epoch": 0.19199760002999963, "grad_norm": 0.002594166435301304, "learning_rate": 3.855421686746989e-06, "loss": -0.0318, "num_tokens": 169635497.0, "reward": 0.9984848141670227, "reward_std": 0.13936654329299927, "rewards/accuracy_reward": 0.6553819417953491, "rewards/batch_coverage_0": 0.2774128675460815, "rewards/batch_coverage_1": 0.2774128675460815, "rewards/batch_coverage_10": 0.3563702404499054, "rewards/batch_coverage_15": 0.36783955097198484, "rewards/batch_coverage_20": 0.37556777000427244, "rewards/batch_coverage_25": 0.38054537773132324, "rewards/batch_coverage_5": 0.3182225406169891, "rewards/brier_reward": 0.7844858169555664, "rewards/confidence_uniqueness_reward": 0.9357450246810913, "rewards/format_reward": 0.9878472208976745, "rewards/frontier_entropy_batch_reward": -0.2880606114864349, "signal/accuracy_reward/centered_abs_mean": 0.1596137136220932, "signal/accuracy_reward/group_std_mean": 0.21180415749549866, "signal/accuracy_reward/group_zero_std_frac": 0.397222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.999223780632019, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0798068568110466, "signal/advantage_abs_mean": 0.7337562799453735, "signal/advantage_pre_scale_abs_mean": 0.10226666629314422, "signal/advantage_pre_scale_std": 0.1641099452972412, "signal/advantage_std": 0.9832229495048523, "signal/batch_coverage_0/centered_abs_mean": 0.16477072536945342, "signal/batch_coverage_0/group_std_mean": 0.20871671438217163, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.029762042686343193, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002356221526861191, "signal/batch_coverage_1/centered_abs_mean": 0.16477072536945342, "signal/batch_coverage_1/group_std_mean": 0.20871671438217163, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.029762042686343193, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002356221526861191, "signal/batch_coverage_10/centered_abs_mean": 0.19158050715923308, "signal/batch_coverage_10/group_std_mean": 0.2435964047908783, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.034541353583335876, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002739601256325841, "signal/batch_coverage_15/centered_abs_mean": 0.19723239839076995, "signal/batch_coverage_15/group_std_mean": 0.2512341469526291, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.035590323805809024, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0028204232454299926, "signal/batch_coverage_20/centered_abs_mean": 0.19890411496162413, "signal/batch_coverage_20/group_std_mean": 0.2538034111261368, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03589809164404869, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0028443288523703814, "signal/batch_coverage_25/centered_abs_mean": 0.19924717247486115, "signal/batch_coverage_25/group_std_mean": 0.2548013597726822, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03595203086733818, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0028492347337305546, "signal/batch_coverage_5/centered_abs_mean": 0.1787838190793991, "signal/batch_coverage_5/group_std_mean": 0.22594253718852997, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03230985403060913, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002556608524173498, "signal/brier_reward/centered_abs_mean": 0.1500013828277588, "signal/brier_reward/group_std_mean": 0.1918517142534256, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18809423148632048, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.015000138618052005, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.03316798135638237, "signal/confidence_uniqueness_reward/group_std_mean": 0.053910914808511734, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042143101990222934, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0033167982008308174, "signal/format_reward/centered_abs_mean": 0.020323350466787814, "signal/format_reward/group_std_mean": 0.0388909000903368, "signal/format_reward/group_zero_std_frac": 0.8361111164093018, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1274551644921303, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.010161675233393907, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3206637859344482, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39140368103981016, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4064549207687378, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032066379114985465, "step": 80 }, { "calibration/aurc": 0.18923391547567533, "calibration/batch_distribution_entropy": 0.9520473330985391, "calibration/buffer_distribution_entropy": 0.9007541310204402, "calibration/confidence_entropy": 0.454584405636062, "calibration/coverage@0%": 0.019984941894130602, "calibration/coverage@1%": 0.019984941894130602, "calibration/coverage@10%": 0.2399654196333001, "calibration/coverage@15%": 0.4899862593203633, "calibration/coverage@20%": 0.6405708575230216, "calibration/coverage@25%": 0.76561468810309, "calibration/coverage@30%": 0.8572303871781062, "calibration/coverage@5%": 0.0599849418941306, "calibration/ece": 0.1479991190866681, "calibration/mean_confidence": 0.5452007696982675, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0078125, "completions/max_length": 3134.6, "completions/max_terminated_length": 3134.6, "completions/mean_length": 805.275439453125, "completions/mean_terminated_length": 811.61259765625, "completions/min_length": 0.0, "completions/min_terminated_length": 290.2, "epoch": 0.2039974500318746, "grad_norm": 0.00243806722573936, "learning_rate": 3.7048192771084342e-06, "loss": -0.0182, "num_tokens": 181999470.0, "reward": 1.0227416276931762, "reward_std": 0.13126814365386963, "rewards/accuracy_reward": 0.693836796283722, "rewards/batch_coverage_0": 0.30402875542640684, "rewards/batch_coverage_1": 0.30402875542640684, "rewards/batch_coverage_10": 0.3916003882884979, "rewards/batch_coverage_15": 0.4001737177371979, "rewards/batch_coverage_20": 0.41051369309425356, "rewards/batch_coverage_25": 0.4160007297992706, "rewards/batch_coverage_5": 0.3573697626590729, "rewards/brier_reward": 0.8003751873970032, "rewards/confidence_uniqueness_reward": 0.9386812210083008, "rewards/format_reward": 0.9921006798744202, "rewards/frontier_entropy_batch_reward": -0.3107993364334106, "signal/accuracy_reward/centered_abs_mean": 0.15584309697151183, "signal/accuracy_reward/group_std_mean": 0.20595036149024964, "signal/accuracy_reward/group_zero_std_frac": 0.4111111104488373, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0481150150299072, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07792154848575591, "signal/advantage_abs_mean": 0.7339847207069397, "signal/advantage_pre_scale_abs_mean": 0.09553650766611099, "signal/advantage_pre_scale_std": 0.15589506924152374, "signal/advantage_std": 0.9831393837928772, "signal/batch_coverage_0/centered_abs_mean": 0.1720154494047165, "signal/batch_coverage_0/group_std_mean": 0.21767355501651764, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03309999741613865, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002459820918738842, "signal/batch_coverage_1/centered_abs_mean": 0.1720154494047165, "signal/batch_coverage_1/group_std_mean": 0.21767355501651764, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03309999741613865, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002459820918738842, "signal/batch_coverage_10/centered_abs_mean": 0.19574157893657684, "signal/batch_coverage_10/group_std_mean": 0.25071599781513215, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03761949762701988, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002799104666337371, "signal/batch_coverage_15/centered_abs_mean": 0.19882267713546753, "signal/batch_coverage_15/group_std_mean": 0.25518170595169065, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03819100335240364, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0028431642800569533, "signal/batch_coverage_20/centered_abs_mean": 0.199673655629158, "signal/batch_coverage_20/group_std_mean": 0.257070392370224, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03838499188423157, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002855333220213652, "signal/batch_coverage_25/centered_abs_mean": 0.20092089772224425, "signal/batch_coverage_25/group_std_mean": 0.2598212480545044, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03866703435778618, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0028731688857078553, "signal/batch_coverage_5/centered_abs_mean": 0.18711472749710084, "signal/batch_coverage_5/group_std_mean": 0.23794482946395873, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03597569689154625, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002675740560516715, "signal/brier_reward/centered_abs_mean": 0.14387299716472626, "signal/brier_reward/group_std_mean": 0.18577905595302582, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19340074360370635, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.014387300051748753, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02960009016096592, "signal/confidence_uniqueness_reward/group_std_mean": 0.049013516306877135, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03989330157637596, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002960009081289172, "signal/format_reward/centered_abs_mean": 0.014480251632630826, "signal/format_reward/group_std_mean": 0.030678948760032652, "signal/format_reward/group_zero_std_frac": 0.8638888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09725062102079392, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007240125816315413, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.338201642036438, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4092584550380707, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4551249384880066, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033820164203643796, "step": 85 }, { "calibration/aurc": 0.11841251098462335, "calibration/batch_distribution_entropy": 0.9437270099405835, "calibration/buffer_distribution_entropy": 0.9070971011642726, "calibration/confidence_entropy": 0.47455616824898056, "calibration/coverage@0%": 0.04532894736842106, "calibration/coverage@1%": 0.07501644736842104, "calibration/coverage@10%": 0.528530701754386, "calibration/coverage@15%": 0.7434020014581308, "calibration/coverage@20%": 0.8073761890709623, "calibration/coverage@25%": 0.8629222359163078, "calibration/coverage@30%": 0.9403157981530343, "calibration/coverage@5%": 0.43197916666666664, "calibration/ece": 0.14672207193323844, "calibration/mean_confidence": 0.6095787607729249, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009722222222222233, "completions/max_length": 3009.0, "completions/max_terminated_length": 3009.0, "completions/mean_length": 798.0718872070313, "completions/mean_terminated_length": 805.9511962890625, "completions/min_length": 0.0, "completions/min_terminated_length": 291.0, "epoch": 0.2159973000337496, "grad_norm": 0.0024022439029067755, "learning_rate": 3.5542168674698798e-06, "loss": -0.0292, "num_tokens": 194261930.0, "reward": 1.012345314025879, "reward_std": 0.1382793515920639, "rewards/accuracy_reward": 0.67734375, "rewards/batch_coverage_0": 0.29151966571807864, "rewards/batch_coverage_1": 0.29151966571807864, "rewards/batch_coverage_10": 0.37335585355758666, "rewards/batch_coverage_15": 0.38456701040267943, "rewards/batch_coverage_20": 0.3926276683807373, "rewards/batch_coverage_25": 0.3991153180599213, "rewards/batch_coverage_5": 0.3377767264842987, "rewards/brier_reward": 0.8084167242050171, "rewards/confidence_uniqueness_reward": 0.9369056940078735, "rewards/format_reward": 0.9902777791023254, "rewards/frontier_entropy_batch_reward": -0.31325617134571077, "signal/accuracy_reward/centered_abs_mean": 0.15619032382965087, "signal/accuracy_reward/group_std_mean": 0.20633386969566345, "signal/accuracy_reward/group_zero_std_frac": 0.4083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9890505075454712, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07809516191482543, "signal/advantage_abs_mean": 0.722357201576233, "signal/advantage_pre_scale_abs_mean": 0.09861443638801574, "signal/advantage_pre_scale_std": 0.16254321336746216, "signal/advantage_std": 0.9832135915756226, "signal/batch_coverage_0/centered_abs_mean": 0.15663588047027588, "signal/batch_coverage_0/group_std_mean": 0.19704360365867615, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.028397080302238465, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002239893050864339, "signal/batch_coverage_1/centered_abs_mean": 0.15663588047027588, "signal/batch_coverage_1/group_std_mean": 0.19704360365867615, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.028397080302238465, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002239893050864339, "signal/batch_coverage_10/centered_abs_mean": 0.18144900798797609, "signal/batch_coverage_10/group_std_mean": 0.23120782077312468, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03289467468857765, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025947208516299723, "signal/batch_coverage_15/centered_abs_mean": 0.1861711025238037, "signal/batch_coverage_15/group_std_mean": 0.23703703582286834, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03376954942941666, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0026622468139976263, "signal/batch_coverage_20/centered_abs_mean": 0.1893145114183426, "signal/batch_coverage_20/group_std_mean": 0.24125303626060485, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.034355881810188296, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027071974240243436, "signal/batch_coverage_25/centered_abs_mean": 0.1931730628013611, "signal/batch_coverage_25/group_std_mean": 0.24652757942676545, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.035058844834566116, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0027623747009783983, "signal/batch_coverage_5/centered_abs_mean": 0.16939508318901061, "signal/batch_coverage_5/group_std_mean": 0.21401880979537963, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03073953352868557, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002422349713742733, "signal/brier_reward/centered_abs_mean": 0.13135956376791, "signal/brier_reward/group_std_mean": 0.17388156652450562, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16675288379192352, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013135956414043904, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.030612779781222343, "signal/confidence_uniqueness_reward/group_std_mean": 0.056104399263858795, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.039064832404255866, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.003061278164386749, "signal/format_reward/centered_abs_mean": 0.01807725727558136, "signal/format_reward/group_std_mean": 0.04161466956138611, "signal/format_reward/group_zero_std_frac": 0.8027777910232544, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.115253297239542, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.00903862863779068, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32865068316459656, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3971480667591095, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.41861586570739745, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03286506943404675, "step": 90 }, { "calibration/aurc": 0.15553885133255266, "calibration/batch_distribution_entropy": 0.9405537018543049, "calibration/buffer_distribution_entropy": 0.9123583877756813, "calibration/confidence_entropy": 0.42898815958161557, "calibration/coverage@0%": 0.09475057832516563, "calibration/coverage@1%": 0.09956341254976456, "calibration/coverage@10%": 0.5655464917805556, "calibration/coverage@15%": 0.6369851395020707, "calibration/coverage@20%": 0.7021939915679865, "calibration/coverage@25%": 0.7455078374465705, "calibration/coverage@30%": 0.7778327401207218, "calibration/coverage@5%": 0.3528750501873545, "calibration/ece": 0.19682903494839277, "calibration/mean_confidence": 0.5376710652957325, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017274305555555557, "completions/max_length": 3323.4, "completions/max_terminated_length": 3323.4, "completions/mean_length": 806.6776245117187, "completions/mean_terminated_length": 820.83466796875, "completions/min_length": 0.0, "completions/min_terminated_length": 282.4, "epoch": 0.22799715003562457, "grad_norm": 0.002317988546565175, "learning_rate": 3.4036144578313257e-06, "loss": -0.0587, "num_tokens": 206646536.0, "reward": 1.0056054592132568, "reward_std": 0.15680947303771972, "rewards/accuracy_reward": 0.6745659708976746, "rewards/batch_coverage_0": 0.319716477394104, "rewards/batch_coverage_1": 0.319716477394104, "rewards/batch_coverage_10": 0.3897965490818024, "rewards/batch_coverage_15": 0.3973939657211304, "rewards/batch_coverage_20": 0.40343857407569883, "rewards/batch_coverage_25": 0.4100212812423706, "rewards/batch_coverage_5": 0.3576392471790314, "rewards/brier_reward": 0.7860231637954712, "rewards/confidence_uniqueness_reward": 0.929762089252472, "rewards/format_reward": 0.9826388835906983, "rewards/frontier_entropy_batch_reward": -0.3172291338443756, "signal/accuracy_reward/centered_abs_mean": 0.16064995527267456, "signal/accuracy_reward/group_std_mean": 0.21915687918663024, "signal/accuracy_reward/group_zero_std_frac": 0.34166666865348816, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9114587306976318, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.08032497763633728, "signal/advantage_abs_mean": 0.6914363980293274, "signal/advantage_pre_scale_abs_mean": 0.10503824055194855, "signal/advantage_pre_scale_std": 0.18097075819969177, "signal/advantage_std": 0.9833443641662598, "signal/batch_coverage_0/centered_abs_mean": 0.16990404427051545, "signal/batch_coverage_0/group_std_mean": 0.21895833909511567, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.027632010728120805, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0024296278599649666, "signal/batch_coverage_1/centered_abs_mean": 0.16990404427051545, "signal/batch_coverage_1/group_std_mean": 0.21895833909511567, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.027632010728120805, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0024296278599649666, "signal/batch_coverage_10/centered_abs_mean": 0.1890587478876114, "signal/batch_coverage_10/group_std_mean": 0.24543083608150482, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.030750004574656487, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0027035401202738286, "signal/batch_coverage_15/centered_abs_mean": 0.19028227627277375, "signal/batch_coverage_15/group_std_mean": 0.2467927187681198, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.030991899222135542, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002721036598086357, "signal/batch_coverage_20/centered_abs_mean": 0.19405905902385712, "signal/batch_coverage_20/group_std_mean": 0.25165227949619295, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.031617505475878716, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027750445529818534, "signal/batch_coverage_25/centered_abs_mean": 0.20035355389118195, "signal/batch_coverage_25/group_std_mean": 0.2593338280916214, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.032659551873803136, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0028650558553636072, "signal/batch_coverage_5/centered_abs_mean": 0.1818935126066208, "signal/batch_coverage_5/group_std_mean": 0.23420885503292083, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.029556793347001075, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002601077314466238, "signal/brier_reward/centered_abs_mean": 0.14850833714008332, "signal/brier_reward/group_std_mean": 0.19897804260253907, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.16746804118156433, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01485083345323801, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042133938521146774, "signal/confidence_uniqueness_reward/group_std_mean": 0.08468769639730453, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04754840657114982, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004213394038379193, "signal/format_reward/centered_abs_mean": 0.03225911408662796, "signal/format_reward/group_std_mean": 0.07445819452404975, "signal/format_reward/group_zero_std_frac": 0.6444444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.18054555654525756, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01612955704331398, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33631141781806945, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40545159578323364, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3834416627883911, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03363114222884178, "step": 95 }, { "calibration/aurc": 0.14822290368263727, "calibration/batch_distribution_entropy": 0.9337155038720034, "calibration/buffer_distribution_entropy": 0.9163495073770178, "calibration/confidence_entropy": 0.4845056722594919, "calibration/coverage@0%": 0.045044248599582566, "calibration/coverage@1%": 0.045044248599582566, "calibration/coverage@10%": 0.34236437978774104, "calibration/coverage@15%": 0.6615942316378179, "calibration/coverage@20%": 0.8370272412600379, "calibration/coverage@25%": 0.9098417593922203, "calibration/coverage@30%": 0.9337582321192357, "calibration/coverage@5%": 0.08942943043126889, "calibration/ece": 0.1282733814835266, "calibration/mean_confidence": 0.6290357321007473, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019791666666666673, "completions/max_length": 3316.6, "completions/max_terminated_length": 3316.6, "completions/mean_length": 801.7419311523438, "completions/mean_terminated_length": 817.86298828125, "completions/min_length": 0.0, "completions/min_terminated_length": 274.6, "epoch": 0.23999700003749952, "grad_norm": 0.002241980517283082, "learning_rate": 3.2530120481927713e-06, "loss": -0.0585, "num_tokens": 218981675.0, "reward": 1.0059626221656799, "reward_std": 0.15443489253520964, "rewards/accuracy_reward": 0.675, "rewards/batch_coverage_0": 0.33700525760650635, "rewards/batch_coverage_1": 0.33700525760650635, "rewards/batch_coverage_10": 0.40210442543029784, "rewards/batch_coverage_15": 0.41211907267570497, "rewards/batch_coverage_20": 0.42381772994995115, "rewards/batch_coverage_25": 0.4277949512004852, "rewards/batch_coverage_5": 0.3827322542667389, "rewards/brier_reward": 0.8183276414871216, "rewards/confidence_uniqueness_reward": 0.9244850397109985, "rewards/format_reward": 0.9797743201255799, "rewards/frontier_entropy_batch_reward": -0.34638661742210386, "signal/accuracy_reward/centered_abs_mean": 0.14997829794883727, "signal/accuracy_reward/group_std_mean": 0.20479413270950317, "signal/accuracy_reward/group_zero_std_frac": 0.38333333730697633, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8926918745040894, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07498914897441863, "signal/advantage_abs_mean": 0.69511878490448, "signal/advantage_pre_scale_abs_mean": 0.10425726771354675, "signal/advantage_pre_scale_std": 0.17962579131126405, "signal/advantage_std": 0.9832849621772766, "signal/batch_coverage_0/centered_abs_mean": 0.14159564077854156, "signal/batch_coverage_0/group_std_mean": 0.18152335584163665, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.024330893903970717, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002024817722849548, "signal/batch_coverage_1/centered_abs_mean": 0.14159564077854156, "signal/batch_coverage_1/group_std_mean": 0.18152335584163665, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.024330893903970717, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002024817722849548, "signal/batch_coverage_10/centered_abs_mean": 0.16167503893375396, "signal/batch_coverage_10/group_std_mean": 0.2084078311920166, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.027843480557203294, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002311953017488122, "signal/batch_coverage_15/centered_abs_mean": 0.16399608552455902, "signal/batch_coverage_15/group_std_mean": 0.21192810237407683, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.028209054842591286, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0023451440967619417, "signal/batch_coverage_20/centered_abs_mean": 0.1698843240737915, "signal/batch_coverage_20/group_std_mean": 0.22043935358524322, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.029261283949017523, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0024293458089232446, "signal/batch_coverage_25/centered_abs_mean": 0.17391247749328614, "signal/batch_coverage_25/group_std_mean": 0.22539261877536773, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.029883557930588722, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0024869484361261128, "signal/batch_coverage_5/centered_abs_mean": 0.15491499304771422, "signal/batch_coverage_5/group_std_mean": 0.19899119138717652, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02663789503276348, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022152844350785015, "signal/brier_reward/centered_abs_mean": 0.12495807856321335, "signal/brier_reward/group_std_mean": 0.17128449082374572, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14955242574214936, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01249580793082714, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.042324337363243106, "signal/confidence_uniqueness_reward/group_std_mean": 0.0849258303642273, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.050359965115785596, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0042324337176978585, "signal/format_reward/centered_abs_mean": 0.0322645403444767, "signal/format_reward/group_std_mean": 0.07438525781035424, "signal/format_reward/group_zero_std_frac": 0.6472222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.1907156676054001, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.01613227017223835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33729679584503175, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40455949306488037, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4041153252124786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03372968137264252, "step": 100 }, { "epoch": 0.23999700003749952, "eval_calibration/aurc": 0.1469159218326392, "eval_calibration/batch_distribution_entropy": 0.8635779316733615, "eval_calibration/buffer_distribution_entropy": 0.9188378744179434, "eval_calibration/confidence_entropy": 0.47168668069352604, "eval_calibration/coverage@0%": 0.28410618279569894, "eval_calibration/coverage@1%": 0.28410618279569894, "eval_calibration/coverage@10%": 0.46387768817204295, "eval_calibration/coverage@15%": 0.5174731182795699, "eval_calibration/coverage@20%": 0.7014448924731184, "eval_calibration/coverage@25%": 0.8887768817204301, "eval_calibration/coverage@30%": 0.957997311827957, "eval_calibration/coverage@5%": 0.29485887096774194, "eval_calibration/ece": 0.17558535769489247, "eval_calibration/mean_confidence": 0.6230294628696237, "eval_completions/clipped_ratio": 0.028472222222222215, "eval_completions/max_length": 2266.0, "eval_completions/max_terminated_length": 2266.0, "eval_completions/mean_length": 773.9508870442709, "eval_completions/mean_terminated_length": 796.6577555338541, "eval_completions/min_length": 0.0, "eval_completions/min_terminated_length": 319.0, "eval_loss": 0.0, "eval_num_tokens": 218981675.0, "eval_reward": 0.9049823184808096, "eval_reward_std": 0.27190424005190533, "eval_rewards/accuracy_reward": 0.6814236144224802, "eval_rewards/batch_coverage_0": 0.02894584136083722, "eval_rewards/batch_coverage_1": 0.02894584136083722, "eval_rewards/batch_coverage_10": 0.05388917370388905, "eval_rewards/batch_coverage_15": 0.08727520704269409, "eval_rewards/batch_coverage_20": 0.15597188969453177, "eval_rewards/batch_coverage_25": 0.21343981474637985, "eval_rewards/batch_coverage_5": 0.03746440044293801, "eval_rewards/brier_reward": 0.799796978632609, "eval_rewards/confidence_uniqueness_reward": 0.8673709034919739, "eval_rewards/format_reward": 0.972222218910853, "eval_rewards/frontier_entropy_batch_reward": -0.972222218910853, "eval_runtime": 214.923, "eval_samples_per_second": 4.653, "eval_signal/accuracy_reward/centered_abs_mean": 0.4163954009612401, "eval_signal/accuracy_reward/group_std_mean": 0.46188920736312866, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.784879336754481, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20819770048062006, "eval_signal/advantage_abs_mean": 0.848372608423233, "eval_signal/advantage_pre_scale_abs_mean": 0.23060731341441473, "eval_signal/advantage_pre_scale_std": 0.27091872692108154, "eval_signal/advantage_std": 0.9864526291688284, "eval_signal/batch_coverage_0/centered_abs_mean": 0.15962670495112738, "eval_signal/batch_coverage_0/group_std_mean": 0.2362965246041616, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.008654607847953836, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022826618903006115, "eval_signal/batch_coverage_1/centered_abs_mean": 0.15962670495112738, "eval_signal/batch_coverage_1/group_std_mean": 0.2362965246041616, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.008654607847953836, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022826618903006115, "eval_signal/batch_coverage_10/centered_abs_mean": 0.13342495014270148, "eval_signal/batch_coverage_10/group_std_mean": 0.1891984591881434, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.007212459032113354, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.0019079767322788637, "eval_signal/batch_coverage_15/centered_abs_mean": 0.13718673835198084, "eval_signal/batch_coverage_15/group_std_mean": 0.18474465360244116, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.007393745006993413, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.0019617703510448337, "eval_signal/batch_coverage_20/centered_abs_mean": 0.18008214980363846, "eval_signal/batch_coverage_20/group_std_mean": 0.22891838351885477, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.009653995279222727, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025751747113342085, "eval_signal/batch_coverage_25/centered_abs_mean": 0.24329387893279394, "eval_signal/batch_coverage_25/group_std_mean": 0.3025979946057002, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.013066323939710855, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034791024712224803, "eval_signal/batch_coverage_5/centered_abs_mean": 0.1504707137743632, "eval_signal/batch_coverage_5/group_std_mean": 0.21995937327543894, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.008191281541561088, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.002151731246461471, "eval_signal/brier_reward/centered_abs_mean": 0.19535045325756073, "eval_signal/brier_reward/group_std_mean": 0.257330559194088, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07330747569600742, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.019535046070814133, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.07103816606104374, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.12991442531347275, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.026370037036637466, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.007103816761324803, "eval_signal/format_reward/centered_abs_mean": 0.05219184048473835, "eval_signal/format_reward/group_std_mean": 0.119682926684618, "eval_signal/format_reward/group_zero_std_frac": 0.4444444552063942, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.09510742562512557, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.026095920242369175, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.05219184048473835, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.119682926684618, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.4444444552063942, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.019021486553053062, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.005219184250260393, "eval_steps_per_second": 0.028, "step": 100 }, { "calibration/aurc": 0.23941046945136274, "calibration/batch_distribution_entropy": 0.9536971303588615, "calibration/buffer_distribution_entropy": 0.9202938946779904, "calibration/confidence_entropy": 0.4764432138439312, "calibration/coverage@0%": 0.0743582721614013, "calibration/coverage@1%": 0.0743582721614013, "calibration/coverage@10%": 0.2693344966610982, "calibration/coverage@15%": 0.37163524177588975, "calibration/coverage@20%": 0.46363612638705776, "calibration/coverage@25%": 0.5449943955053795, "calibration/coverage@30%": 0.5929579093432007, "calibration/coverage@5%": 0.19338995056617767, "calibration/ece": 0.11256938371168097, "calibration/mean_confidence": 0.5986923518708889, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.03272569444444442, "completions/max_length": 3670.0, "completions/max_terminated_length": 3670.0, "completions/mean_length": 784.5158935546875, "completions/mean_terminated_length": 811.465380859375, "completions/min_length": 0.0, "completions/min_terminated_length": 258.6, "epoch": 0.2519968500393745, "grad_norm": 0.0023560032714158297, "learning_rate": 3.1024096385542172e-06, "loss": -0.1058, "num_tokens": 231096162.0, "reward": 0.9873493909835815, "reward_std": 0.19203925132751465, "rewards/accuracy_reward": 0.6542534589767456, "rewards/batch_coverage_0": 0.3244723677635193, "rewards/batch_coverage_1": 0.3244723677635193, "rewards/batch_coverage_10": 0.3919206619262695, "rewards/batch_coverage_15": 0.39909898638725283, "rewards/batch_coverage_20": 0.4087284207344055, "rewards/batch_coverage_25": 0.41347445249557496, "rewards/batch_coverage_5": 0.369165313243866, "rewards/brier_reward": 0.7991156458854676, "rewards/confidence_uniqueness_reward": 0.9148384690284729, "rewards/format_reward": 0.9669270873069763, "rewards/frontier_entropy_batch_reward": -0.32264376878738404, "signal/accuracy_reward/centered_abs_mean": 0.162060546875, "signal/accuracy_reward/group_std_mean": 0.22576369643211364, "signal/accuracy_reward/group_zero_std_frac": 0.3111111134290695, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.7692596673965454, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0810302734375, "signal/advantage_abs_mean": 0.6655745029449462, "signal/advantage_pre_scale_abs_mean": 0.12335889935493469, "signal/advantage_pre_scale_std": 0.21603835821151735, "signal/advantage_std": 0.9835192799568176, "signal/batch_coverage_0/centered_abs_mean": 0.15007852017879486, "signal/batch_coverage_0/group_std_mean": 0.19404472410678864, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.020451470464468, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021461226744577287, "signal/batch_coverage_1/centered_abs_mean": 0.15007852017879486, "signal/batch_coverage_1/group_std_mean": 0.19404472410678864, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.020451470464468, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021461226744577287, "signal/batch_coverage_10/centered_abs_mean": 0.16576945781707764, "signal/batch_coverage_10/group_std_mean": 0.21533091962337494, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.022668870538473128, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0023705032654106617, "signal/batch_coverage_15/centered_abs_mean": 0.1676239103078842, "signal/batch_coverage_15/group_std_mean": 0.2177862823009491, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.022917681932449342, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002397021884098649, "signal/batch_coverage_20/centered_abs_mean": 0.1729596495628357, "signal/batch_coverage_20/group_std_mean": 0.2248106360435486, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.023749194294214248, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0024733229540288447, "signal/batch_coverage_25/centered_abs_mean": 0.1749100923538208, "signal/batch_coverage_25/group_std_mean": 0.22794493436813354, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.024009492993354798, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025012143421918155, "signal/batch_coverage_5/centered_abs_mean": 0.16244375109672546, "signal/batch_coverage_5/group_std_mean": 0.2103522926568985, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.02219676934182644, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002322945697233081, "signal/brier_reward/centered_abs_mean": 0.13956743776798247, "signal/brier_reward/group_std_mean": 0.19816478788852693, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.13259952664375305, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013956744223833084, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.06475906372070313, "signal/confidence_uniqueness_reward/group_std_mean": 0.1287354990839958, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.061011632531881334, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.006475906539708376, "signal/format_reward/centered_abs_mean": 0.06034613698720932, "signal/format_reward/group_std_mean": 0.1259764924645424, "signal/format_reward/group_zero_std_frac": 0.45277778506278993, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.28261774182319643, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.03017306849360466, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34150766730308535, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40601075887680055, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.32770722508430483, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03415076658129692, "step": 105 }, { "calibration/aurc": 0.13983259526717134, "calibration/batch_distribution_entropy": 0.9209507980047758, "calibration/buffer_distribution_entropy": 0.9227978962787329, "calibration/confidence_entropy": 0.4329057677807843, "calibration/coverage@0%": 0.09626697385971085, "calibration/coverage@1%": 0.09836121469740719, "calibration/coverage@10%": 0.4027895951617685, "calibration/coverage@15%": 0.6134134086345293, "calibration/coverage@20%": 0.6779665906935597, "calibration/coverage@25%": 0.8846078868218783, "calibration/coverage@30%": 0.9452127659574469, "calibration/coverage@5%": 0.2805005944055738, "calibration/ece": 0.12565381656881222, "calibration/mean_confidence": 0.6210956583945125, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02430555555555556, "completions/max_length": 3419.6, "completions/max_terminated_length": 3419.6, "completions/mean_length": 795.7691772460937, "completions/mean_terminated_length": 815.7261596679688, "completions/min_length": 0.0, "completions/min_terminated_length": 277.4, "epoch": 0.2639967000412495, "grad_norm": 0.002739675110206008, "learning_rate": 2.9518072289156627e-06, "loss": -0.077, "num_tokens": 243371871.0, "reward": 1.0169583320617677, "reward_std": 0.16964165568351747, "rewards/accuracy_reward": 0.7042534828186036, "rewards/batch_coverage_0": 0.3346091389656067, "rewards/batch_coverage_1": 0.3346091389656067, "rewards/batch_coverage_10": 0.40635396242141725, "rewards/batch_coverage_15": 0.4123306512832642, "rewards/batch_coverage_20": 0.41916595697402953, "rewards/batch_coverage_25": 0.42104737758636473, "rewards/batch_coverage_5": 0.37122017741203306, "rewards/brier_reward": 0.8008349299430847, "rewards/confidence_uniqueness_reward": 0.9194580793380738, "rewards/format_reward": 0.9751736164093018, "rewards/frontier_entropy_batch_reward": -0.3338502585887909, "signal/accuracy_reward/centered_abs_mean": 0.1485080301761627, "signal/accuracy_reward/group_std_mean": 0.20822436213493348, "signal/accuracy_reward/group_zero_std_frac": 0.3527777910232544, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8229191660881042, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07425401508808135, "signal/advantage_abs_mean": 0.6725971579551697, "signal/advantage_pre_scale_abs_mean": 0.11048495620489121, "signal/advantage_pre_scale_std": 0.19505227506160736, "signal/advantage_std": 0.983374273777008, "signal/batch_coverage_0/centered_abs_mean": 0.14558544754981995, "signal/batch_coverage_0/group_std_mean": 0.18584783673286437, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.02308763712644577, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002081871870905161, "signal/batch_coverage_1/centered_abs_mean": 0.14558544754981995, "signal/batch_coverage_1/group_std_mean": 0.18584783673286437, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.02308763712644577, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002081871870905161, "signal/batch_coverage_10/centered_abs_mean": 0.1662052035331726, "signal/batch_coverage_10/group_std_mean": 0.2154100239276886, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.02644023597240448, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0023767344187945127, "signal/batch_coverage_15/centered_abs_mean": 0.16461114585399628, "signal/batch_coverage_15/group_std_mean": 0.21437348127365113, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.026267021521925928, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002353939367458224, "signal/batch_coverage_20/centered_abs_mean": 0.16556849777698518, "signal/batch_coverage_20/group_std_mean": 0.21627365350723265, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.026395087689161302, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002367629436776042, "signal/batch_coverage_25/centered_abs_mean": 0.16443374752998352, "signal/batch_coverage_25/group_std_mean": 0.21521584689617157, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.026125259697437286, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0023514027241617443, "signal/batch_coverage_5/centered_abs_mean": 0.15621961951255797, "signal/batch_coverage_5/group_std_mean": 0.2001771241426468, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.024791359528899194, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022339405957609417, "signal/brier_reward/centered_abs_mean": 0.13265540301799775, "signal/brier_reward/group_std_mean": 0.18497947752475738, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.14658557772636413, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013265540637075902, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.05351239144802093, "signal/confidence_uniqueness_reward/group_std_mean": 0.10313453078269959, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.05855522751808166, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.005351239163428545, "signal/format_reward/centered_abs_mean": 0.04409722238779068, "signal/format_reward/group_std_mean": 0.09439656734466553, "signal/format_reward/group_zero_std_frac": 0.5666666805744172, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.2371739089488983, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.02204861119389534, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33227952718734743, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4028955101966858, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.3693257212638855, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03322795443236828, "step": 110 }, { "calibration/aurc": 0.22413314193294318, "calibration/batch_distribution_entropy": 0.9561271819585884, "calibration/buffer_distribution_entropy": 0.9254413068575653, "calibration/confidence_entropy": 0.48204974103909837, "calibration/coverage@0%": 0.02640799142926977, "calibration/coverage@1%": 0.02640799142926977, "calibration/coverage@10%": 0.25870477742214976, "calibration/coverage@15%": 0.4443397401757377, "calibration/coverage@20%": 0.61614655555679, "calibration/coverage@25%": 0.6815468359673653, "calibration/coverage@30%": 0.7126977128538204, "calibration/coverage@5%": 0.10948419227001834, "calibration/ece": 0.16517593758056204, "calibration/mean_confidence": 0.5588173910737515, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01909722222222223, "completions/max_length": 3680.8, "completions/max_terminated_length": 3680.8, "completions/mean_length": 804.56328125, "completions/mean_terminated_length": 820.0994506835938, "completions/min_length": 0.0, "completions/min_terminated_length": 290.6, "epoch": 0.27599655004312446, "grad_norm": 0.0027605455834418535, "learning_rate": 2.8012048192771087e-06, "loss": -0.059, "num_tokens": 255719640.0, "reward": 1.007480835914612, "reward_std": 0.1431771606206894, "rewards/accuracy_reward": 0.6782118082046509, "rewards/batch_coverage_0": 0.3108770728111267, "rewards/batch_coverage_1": 0.3108770728111267, "rewards/batch_coverage_10": 0.3816157579421997, "rewards/batch_coverage_15": 0.39420172572135925, "rewards/batch_coverage_20": 0.4015673458576202, "rewards/batch_coverage_25": 0.40532844662666323, "rewards/batch_coverage_5": 0.35119245052337644, "rewards/brier_reward": 0.7949256420135498, "rewards/confidence_uniqueness_reward": 0.928059709072113, "rewards/format_reward": 0.9809027791023255, "rewards/frontier_entropy_batch_reward": -0.30920955538749695, "signal/accuracy_reward/centered_abs_mean": 0.14369032084941863, "signal/accuracy_reward/group_std_mean": 0.19022368788719177, "signal/accuracy_reward/group_zero_std_frac": 0.45277778506278993, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9364258766174316, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07184516042470931, "signal/advantage_abs_mean": 0.7183647990226746, "signal/advantage_pre_scale_abs_mean": 0.10155642777681351, "signal/advantage_pre_scale_std": 0.17249636948108674, "signal/advantage_std": 0.9831730961799622, "signal/batch_coverage_0/centered_abs_mean": 0.1581096827983856, "signal/batch_coverage_0/group_std_mean": 0.20030168890953065, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0294973898679018, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022609684616327285, "signal/batch_coverage_1/centered_abs_mean": 0.1581096827983856, "signal/batch_coverage_1/group_std_mean": 0.20030168890953065, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0294973898679018, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022609684616327285, "signal/batch_coverage_10/centered_abs_mean": 0.179025998711586, "signal/batch_coverage_10/group_std_mean": 0.22901394069194794, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.033418096229434015, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025600716937333344, "signal/batch_coverage_15/centered_abs_mean": 0.18170326352119445, "signal/batch_coverage_15/group_std_mean": 0.2334654837846756, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03398225046694279, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002598356641829014, "signal/batch_coverage_20/centered_abs_mean": 0.18451933860778807, "signal/batch_coverage_20/group_std_mean": 0.23735863268375396, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03448176868259907, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002638626517727971, "signal/batch_coverage_25/centered_abs_mean": 0.1879439651966095, "signal/batch_coverage_25/group_std_mean": 0.24171611070632934, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03511525765061378, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026875986717641354, "signal/batch_coverage_5/centered_abs_mean": 0.16925646960735322, "signal/batch_coverage_5/group_std_mean": 0.21534042656421662, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03158362843096256, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024203675333410502, "signal/brier_reward/centered_abs_mean": 0.13752256333827972, "signal/brier_reward/group_std_mean": 0.18067309260368347, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1799521267414093, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.013752257265150548, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.04287937767803669, "signal/confidence_uniqueness_reward/group_std_mean": 0.07394094616174698, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0566208966076374, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004287937888875604, "signal/format_reward/centered_abs_mean": 0.03215060792863369, "signal/format_reward/group_std_mean": 0.06199740841984749, "signal/format_reward/group_zero_std_frac": 0.7361111044883728, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.21110976338386536, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.016075303964316844, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3243951678276062, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3954238653182983, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4267133414745331, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.032439516857266426, "step": 115 }, { "calibration/aurc": 0.21397657408258555, "calibration/batch_distribution_entropy": 0.9258972265984567, "calibration/buffer_distribution_entropy": 0.9280627554882404, "calibration/confidence_entropy": 0.44351669627470247, "calibration/coverage@0%": 0.04573898394481331, "calibration/coverage@1%": 0.04573898394481331, "calibration/coverage@10%": 0.4020745849981918, "calibration/coverage@15%": 0.49198784044610433, "calibration/coverage@20%": 0.5461041566090393, "calibration/coverage@25%": 0.5702239862091493, "calibration/coverage@30%": 0.6633931622763249, "calibration/coverage@5%": 0.13628652176502468, "calibration/ece": 0.134714352205786, "calibration/mean_confidence": 0.6368109718865416, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006423611111111139, "completions/max_length": 3506.6, "completions/max_terminated_length": 3506.6, "completions/mean_length": 806.577001953125, "completions/mean_terminated_length": 811.7610961914063, "completions/min_length": 0.0, "completions/min_terminated_length": 297.2, "epoch": 0.28799640004499943, "grad_norm": 0.002654542215168476, "learning_rate": 2.6506024096385547e-06, "loss": -0.0263, "num_tokens": 268093263.0, "reward": 1.0258622884750366, "reward_std": 0.1160861998796463, "rewards/accuracy_reward": 0.6960069537162781, "rewards/batch_coverage_0": 0.3644671857357025, "rewards/batch_coverage_1": 0.3644671857357025, "rewards/batch_coverage_10": 0.44366928935050964, "rewards/batch_coverage_15": 0.45305138230323794, "rewards/batch_coverage_20": 0.46463318467140197, "rewards/batch_coverage_25": 0.4713035702705383, "rewards/batch_coverage_5": 0.41088991761207583, "rewards/brier_reward": 0.8357924342155456, "rewards/confidence_uniqueness_reward": 0.9336610436439514, "rewards/format_reward": 0.9934895873069763, "rewards/frontier_entropy_batch_reward": -0.38337743282318115, "signal/accuracy_reward/centered_abs_mean": 0.12651909589767457, "signal/accuracy_reward/group_std_mean": 0.17061871588230132, "signal/accuracy_reward/group_zero_std_frac": 0.49722222685813905, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0228377342224122, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06325954794883729, "signal/advantage_abs_mean": 0.7268272519111634, "signal/advantage_pre_scale_abs_mean": 0.08354652673006058, "signal/advantage_pre_scale_std": 0.1430157944560051, "signal/advantage_std": 0.9828723788261413, "signal/batch_coverage_0/centered_abs_mean": 0.15081151723861694, "signal/batch_coverage_0/group_std_mean": 0.1929273337125778, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03486100696027279, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002156604756601155, "signal/batch_coverage_1/centered_abs_mean": 0.15081151723861694, "signal/batch_coverage_1/group_std_mean": 0.1929273337125778, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03486100696027279, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002156604756601155, "signal/batch_coverage_10/centered_abs_mean": 0.17233260571956635, "signal/batch_coverage_10/group_std_mean": 0.22250266671180724, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03987372517585754, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0024643562734127044, "signal/batch_coverage_15/centered_abs_mean": 0.17618072628974915, "signal/batch_coverage_15/group_std_mean": 0.22784018516540527, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040774937719106674, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025193843990564345, "signal/batch_coverage_20/centered_abs_mean": 0.1772436648607254, "signal/batch_coverage_20/group_std_mean": 0.23021324276924132, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04101964309811592, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025345843750983477, "signal/batch_coverage_25/centered_abs_mean": 0.18081068694591523, "signal/batch_coverage_25/group_std_mean": 0.2347923129796982, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04183421730995178, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025855927728116513, "signal/batch_coverage_5/centered_abs_mean": 0.16231553256511688, "signal/batch_coverage_5/group_std_mean": 0.2089778631925583, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.037541044503450395, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023211120162159203, "signal/brier_reward/centered_abs_mean": 0.11250930428504943, "signal/brier_reward/group_std_mean": 0.1498480796813965, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18200299441814421, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011250930652022362, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.029111380875110626, "signal/confidence_uniqueness_reward/group_std_mean": 0.04803909808397293, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.047121844440698626, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0029111379291862248, "signal/format_reward/centered_abs_mean": 0.012060546688735485, "signal/format_reward/group_std_mean": 0.027607891708612442, "signal/format_reward/group_zero_std_frac": 0.8694444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09729969501495361, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006030273344367743, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3399507701396942, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40973573327064516, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5499330401420593, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0339950755238533, "step": 120 }, { "calibration/aurc": 0.12505652489525854, "calibration/batch_distribution_entropy": 0.9053102395267109, "calibration/buffer_distribution_entropy": 0.9297300307812222, "calibration/confidence_entropy": 0.4474353663526843, "calibration/coverage@0%": 0.09448594417348308, "calibration/coverage@1%": 0.2533700281595579, "calibration/coverage@10%": 0.537857101230514, "calibration/coverage@15%": 0.6128202678308693, "calibration/coverage@20%": 0.6741103541459968, "calibration/coverage@25%": 0.8841481912099155, "calibration/coverage@30%": 0.9522485533204741, "calibration/coverage@5%": 0.33825546076703933, "calibration/ece": 0.1661430153743542, "calibration/mean_confidence": 0.6058376390647029, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003645833333333304, "completions/max_length": 2546.0, "completions/max_terminated_length": 2546.0, "completions/mean_length": 796.4169311523438, "completions/mean_terminated_length": 799.3395141601562, "completions/min_length": 0.0, "completions/min_terminated_length": 305.6, "epoch": 0.2999962500468744, "grad_norm": 0.002558889100328088, "learning_rate": 2.5e-06, "loss": -0.0092, "num_tokens": 280385650.0, "reward": 1.0349355697631837, "reward_std": 0.11634077429771424, "rewards/accuracy_reward": 0.7077256917953492, "rewards/batch_coverage_0": 0.33946998715400695, "rewards/batch_coverage_1": 0.33946998715400695, "rewards/batch_coverage_10": 0.4023483574390411, "rewards/batch_coverage_15": 0.4080226182937622, "rewards/batch_coverage_20": 0.42021956443786623, "rewards/batch_coverage_25": 0.42603095173835753, "rewards/batch_coverage_5": 0.37814598679542544, "rewards/brier_reward": 0.8199628710746765, "rewards/confidence_uniqueness_reward": 0.9409253120422363, "rewards/format_reward": 0.9963541626930237, "rewards/frontier_entropy_batch_reward": -0.31999213695526124, "signal/accuracy_reward/centered_abs_mean": 0.14109700471162795, "signal/accuracy_reward/group_std_mean": 0.18465364575386048, "signal/accuracy_reward/group_zero_std_frac": 0.4777777850627899, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0281545758247375, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07054850235581397, "signal/advantage_abs_mean": 0.7421541452407837, "signal/advantage_pre_scale_abs_mean": 0.08656396716833115, "signal/advantage_pre_scale_std": 0.13928102254867553, "signal/advantage_std": 0.9830185294151306, "signal/batch_coverage_0/centered_abs_mean": 0.15885628163814544, "signal/batch_coverage_0/group_std_mean": 0.200607630610466, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.033229194954037665, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022716447710990906, "signal/batch_coverage_1/centered_abs_mean": 0.15885628163814544, "signal/batch_coverage_1/group_std_mean": 0.200607630610466, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.033229194954037665, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022716447710990906, "signal/batch_coverage_10/centered_abs_mean": 0.17724139988422394, "signal/batch_coverage_10/group_std_mean": 0.22531118392944335, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037116704136133195, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025345519650727512, "signal/batch_coverage_15/centered_abs_mean": 0.17550498247146606, "signal/batch_coverage_15/group_std_mean": 0.22393256425857544, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.036869344860315324, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002509721275418997, "signal/batch_coverage_20/centered_abs_mean": 0.1801792323589325, "signal/batch_coverage_20/group_std_mean": 0.23051097393035888, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.037887966632843016, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025765630416572094, "signal/batch_coverage_25/centered_abs_mean": 0.1810545653104782, "signal/batch_coverage_25/group_std_mean": 0.2324410378932953, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03815377466380596, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002589080249890685, "signal/batch_coverage_5/centered_abs_mean": 0.16935512125492097, "signal/batch_coverage_5/group_std_mean": 0.21444514393806458, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0354438565671444, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00242177820764482, "signal/brier_reward/centered_abs_mean": 0.11938721984624863, "signal/brier_reward/group_std_mean": 0.15605800747871398, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17510702908039094, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011938722059130668, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022502795234322547, "signal/confidence_uniqueness_reward/group_std_mean": 0.035726646333932875, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03346846289932728, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022502796724438666, "signal/format_reward/centered_abs_mean": 0.006901041604578495, "signal/format_reward/group_std_mean": 0.016878756508231164, "signal/format_reward/group_zero_std_frac": 0.9166666865348816, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05064408630132675, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0034505208022892475, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3258630931377411, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39527029991149903, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4775148153305054, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0325863104313612, "step": 125 }, { "calibration/aurc": 0.18964223588341061, "calibration/batch_distribution_entropy": 0.8968313820197127, "calibration/buffer_distribution_entropy": 0.9302606312327433, "calibration/confidence_entropy": 0.4109658348973223, "calibration/coverage@0%": 0.029853725752874115, "calibration/coverage@1%": 0.06745163698002817, "calibration/coverage@10%": 0.3061498211511034, "calibration/coverage@15%": 0.4607180967743406, "calibration/coverage@20%": 0.6071381242054323, "calibration/coverage@25%": 0.6690803330006971, "calibration/coverage@30%": 0.792739877608118, "calibration/coverage@5%": 0.12567499897475154, "calibration/ece": 0.12968557612437498, "calibration/mean_confidence": 0.6015370253885455, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.007638888888888906, "completions/max_length": 2966.8, "completions/max_terminated_length": 2966.8, "completions/mean_length": 811.5697998046875, "completions/mean_terminated_length": 817.8347290039062, "completions/min_length": 0.0, "completions/min_terminated_length": 249.2, "epoch": 0.3119961000487494, "grad_norm": 0.002718381118029356, "learning_rate": 2.349397590361446e-06, "loss": -0.0172, "num_tokens": 292859734.0, "reward": 1.009836530685425, "reward_std": 0.12487356960773469, "rewards/accuracy_reward": 0.6718750119209289, "rewards/batch_coverage_0": 0.34405243396759033, "rewards/batch_coverage_1": 0.34405243396759033, "rewards/batch_coverage_10": 0.4103072345256805, "rewards/batch_coverage_15": 0.4188034474849701, "rewards/batch_coverage_20": 0.43137113451957704, "rewards/batch_coverage_25": 0.4350013732910156, "rewards/batch_coverage_5": 0.38362832069396974, "rewards/brier_reward": 0.810641348361969, "rewards/confidence_uniqueness_reward": 0.932453966140747, "rewards/format_reward": 0.9923611044883728, "rewards/frontier_entropy_batch_reward": -0.3616227746009827, "signal/accuracy_reward/centered_abs_mean": 0.15205078423023224, "signal/accuracy_reward/group_std_mean": 0.19650846123695373, "signal/accuracy_reward/group_zero_std_frac": 0.45833333730697634, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.1515358924865722, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07602539211511612, "signal/advantage_abs_mean": 0.7485694050788879, "signal/advantage_pre_scale_abs_mean": 0.09428424239158631, "signal/advantage_pre_scale_std": 0.15209928154945374, "signal/advantage_std": 0.9829629063606262, "signal/batch_coverage_0/centered_abs_mean": 0.15308986306190492, "signal/batch_coverage_0/group_std_mean": 0.19674286246299744, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.033842490240931514, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002189185074530542, "signal/batch_coverage_1/centered_abs_mean": 0.15308986306190492, "signal/batch_coverage_1/group_std_mean": 0.19674286246299744, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.033842490240931514, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002189185074530542, "signal/batch_coverage_10/centered_abs_mean": 0.1709138721227646, "signal/batch_coverage_10/group_std_mean": 0.22127122581005096, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037541619315743444, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0024440682493150232, "signal/batch_coverage_15/centered_abs_mean": 0.1729219973087311, "signal/batch_coverage_15/group_std_mean": 0.22372837960720063, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03800523579120636, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0024727844633162023, "signal/batch_coverage_20/centered_abs_mean": 0.17704226374626159, "signal/batch_coverage_20/group_std_mean": 0.22976912558078766, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03873509056866169, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025317042600363493, "signal/batch_coverage_25/centered_abs_mean": 0.17734608054161072, "signal/batch_coverage_25/group_std_mean": 0.23120309710502623, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03868867196142674, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025360488798469305, "signal/batch_coverage_5/centered_abs_mean": 0.1632935345172882, "signal/batch_coverage_5/group_std_mean": 0.2102894514799118, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03598980866372585, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023350975010544063, "signal/brier_reward/centered_abs_mean": 0.1272154778242111, "signal/brier_reward/group_std_mean": 0.16492744982242585, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19245673716068268, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012721548043191432, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.031321781873703006, "signal/confidence_uniqueness_reward/group_std_mean": 0.04696677774190903, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04759872481226921, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00313217812217772, "signal/format_reward/centered_abs_mean": 0.012825520895421504, "signal/format_reward/group_std_mean": 0.024604595825076105, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0938265562057495, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.006412760447710752, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.339335960149765, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4103351831436157, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5164508819580078, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033933597058057784, "step": 130 }, { "calibration/aurc": 0.1719525584030117, "calibration/batch_distribution_entropy": 0.9388767749321543, "calibration/buffer_distribution_entropy": 0.9316017917252024, "calibration/confidence_entropy": 0.4678890728504398, "calibration/coverage@0%": 0.08349651871192341, "calibration/coverage@1%": 0.13143766318537858, "calibration/coverage@10%": 0.4041857049608355, "calibration/coverage@15%": 0.4981723237597911, "calibration/coverage@20%": 0.5843342036553525, "calibration/coverage@25%": 0.6877284595300261, "calibration/coverage@30%": 0.814621409921671, "calibration/coverage@5%": 0.2992629460400348, "calibration/ece": 0.13737020768853353, "calibration/mean_confidence": 0.6075435421809182, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004861111111111094, "completions/max_length": 3164.2, "completions/max_terminated_length": 3164.2, "completions/mean_length": 797.4372436523438, "completions/mean_terminated_length": 801.3845947265625, "completions/min_length": 0.0, "completions/min_terminated_length": 259.6, "epoch": 0.32399595005062437, "grad_norm": 0.002610380295664072, "learning_rate": 2.1987951807228917e-06, "loss": -0.0119, "num_tokens": 305139235.0, "reward": 1.0320541143417359, "reward_std": 0.11483577936887741, "rewards/accuracy_reward": 0.6981770753860473, "rewards/batch_coverage_0": 0.33804223537445066, "rewards/batch_coverage_1": 0.33804223537445066, "rewards/batch_coverage_10": 0.42376949787139895, "rewards/batch_coverage_15": 0.4312718451023102, "rewards/batch_coverage_20": 0.43675391674041747, "rewards/batch_coverage_25": 0.4407239556312561, "rewards/batch_coverage_5": 0.38694225549697875, "rewards/brier_reward": 0.8242491722106934, "rewards/confidence_uniqueness_reward": 0.9419329762458801, "rewards/format_reward": 0.9949652791023255, "rewards/frontier_entropy_batch_reward": -0.3111158788204193, "signal/accuracy_reward/centered_abs_mean": 0.1289333775639534, "signal/accuracy_reward/group_std_mean": 0.1770886391401291, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9275030612945556, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0644666887819767, "signal/advantage_abs_mean": 0.73212890625, "signal/advantage_pre_scale_abs_mean": 0.08376801013946533, "signal/advantage_pre_scale_std": 0.1376526966691017, "signal/advantage_std": 0.9830352306365967, "signal/batch_coverage_0/centered_abs_mean": 0.15243115425109863, "signal/batch_coverage_0/group_std_mean": 0.19285766780376434, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.031641974300146106, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021797654684633017, "signal/batch_coverage_1/centered_abs_mean": 0.15243115425109863, "signal/batch_coverage_1/group_std_mean": 0.19285766780376434, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.031641974300146106, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021797654684633017, "signal/batch_coverage_10/centered_abs_mean": 0.1790243446826935, "signal/batch_coverage_10/group_std_mean": 0.2288077563047409, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037215839698910715, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002560048084706068, "signal/batch_coverage_15/centered_abs_mean": 0.176902437210083, "signal/batch_coverage_15/group_std_mean": 0.2270435243844986, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03675914704799652, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002529704850167036, "signal/batch_coverage_20/centered_abs_mean": 0.17587584555149077, "signal/batch_coverage_20/group_std_mean": 0.2266252100467682, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.036522331088781355, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025150245986878873, "signal/batch_coverage_25/centered_abs_mean": 0.1777946799993515, "signal/batch_coverage_25/group_std_mean": 0.2295131653547287, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03697492778301239, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002542464016005397, "signal/batch_coverage_5/centered_abs_mean": 0.16585546731948853, "signal/batch_coverage_5/group_std_mean": 0.2102999210357666, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03446479067206383, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023717330768704414, "signal/brier_reward/centered_abs_mean": 0.11271385103464127, "signal/brier_reward/group_std_mean": 0.148339182138443, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1631714165210724, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011271385289728642, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023278644308447838, "signal/confidence_uniqueness_reward/group_std_mean": 0.035085957124829294, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03395293578505516, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023278645239770412, "signal/format_reward/centered_abs_mean": 0.008951822761446238, "signal/format_reward/group_std_mean": 0.0176251407712698, "signal/format_reward/group_zero_std_frac": 0.9250000238418579, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06485291570425034, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004475911380723119, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32853866219520567, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39873294830322265, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.47811604142189024, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03285386860370636, "step": 135 }, { "calibration/aurc": 0.14304316351618623, "calibration/batch_distribution_entropy": 0.9370140718828288, "calibration/buffer_distribution_entropy": 0.9367364808679118, "calibration/confidence_entropy": 0.4456631514533238, "calibration/coverage@0%": 0.058948490813648294, "calibration/coverage@1%": 0.058948490813648294, "calibration/coverage@10%": 0.466063812335958, "calibration/coverage@15%": 0.5719939304461943, "calibration/coverage@20%": 0.7536909448818897, "calibration/coverage@25%": 0.8405101706036746, "calibration/coverage@30%": 0.9127050524934383, "calibration/coverage@5%": 0.26188074146981627, "calibration/ece": 0.11045847536540351, "calibration/mean_confidence": 0.622812944901985, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004513888888888884, "completions/max_length": 3191.6, "completions/max_terminated_length": 3191.6, "completions/mean_length": 807.037158203125, "completions/mean_terminated_length": 810.6978637695313, "completions/min_length": 0.0, "completions/min_terminated_length": 296.2, "epoch": 0.33599580005249935, "grad_norm": 0.0026262206956744194, "learning_rate": 2.0481927710843377e-06, "loss": -0.0153, "num_tokens": 317540527.0, "reward": 1.024456834793091, "reward_std": 0.11049772948026657, "rewards/accuracy_reward": 0.6847222208976745, "rewards/batch_coverage_0": 0.35727536082267763, "rewards/batch_coverage_1": 0.35727536082267763, "rewards/batch_coverage_10": 0.43343634605407716, "rewards/batch_coverage_15": 0.44259007573127745, "rewards/batch_coverage_20": 0.44810510873794557, "rewards/batch_coverage_25": 0.4504728734493256, "rewards/batch_coverage_5": 0.39787633419036866, "rewards/brier_reward": 0.8221214413642883, "rewards/confidence_uniqueness_reward": 0.9401463627815246, "rewards/format_reward": 0.9949652910232544, "rewards/frontier_entropy_batch_reward": -0.3289829194545746, "signal/accuracy_reward/centered_abs_mean": 0.1142578125, "signal/accuracy_reward/group_std_mean": 0.1605698734521866, "signal/accuracy_reward/group_zero_std_frac": 0.5055555522441864, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8772634863853455, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05712890625, "signal/advantage_abs_mean": 0.7328930735588074, "signal/advantage_pre_scale_abs_mean": 0.07996267676353455, "signal/advantage_pre_scale_std": 0.13530257642269133, "signal/advantage_std": 0.9829383611679077, "signal/batch_coverage_0/centered_abs_mean": 0.15471593141555787, "signal/batch_coverage_0/group_std_mean": 0.19745731651782988, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0342778779566288, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002212437754496932, "signal/batch_coverage_1/centered_abs_mean": 0.15471593141555787, "signal/batch_coverage_1/group_std_mean": 0.19745731651782988, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0342778779566288, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002212437754496932, "signal/batch_coverage_10/centered_abs_mean": 0.17826486825942994, "signal/batch_coverage_10/group_std_mean": 0.22812734246253968, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03941522017121315, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002549187559634447, "signal/batch_coverage_15/centered_abs_mean": 0.18213868141174316, "signal/batch_coverage_15/group_std_mean": 0.23364066183567048, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04027082771062851, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0026045831851661204, "signal/batch_coverage_20/centered_abs_mean": 0.18287501633167266, "signal/batch_coverage_20/group_std_mean": 0.2352455586194992, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04047075062990189, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002615112857893109, "signal/batch_coverage_25/centered_abs_mean": 0.18394773900508882, "signal/batch_coverage_25/group_std_mean": 0.23635469377040863, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04076088219881058, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026304526720196007, "signal/batch_coverage_5/centered_abs_mean": 0.1656036376953125, "signal/batch_coverage_5/group_std_mean": 0.2107256144285202, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.036629929393529895, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002368132071569562, "signal/brier_reward/centered_abs_mean": 0.1181584894657135, "signal/brier_reward/group_std_mean": 0.15519708395004272, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18297027945518493, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01181584969162941, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024365550279617308, "signal/confidence_uniqueness_reward/group_std_mean": 0.0377134170383215, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.038112782314419745, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.00243655510712415, "signal/format_reward/centered_abs_mean": 0.008908420242369175, "signal/format_reward/group_std_mean": 0.018860687501728535, "signal/format_reward/group_zero_std_frac": 0.9138889074325561, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0694626085460186, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004454210121184588, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33632318377494813, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40446406602859497, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5234416484832763, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03363232016563415, "step": 140 }, { "calibration/aurc": 0.14992865764971097, "calibration/batch_distribution_entropy": 0.9724585718516892, "calibration/buffer_distribution_entropy": 0.9480821107150399, "calibration/confidence_entropy": 0.4557665676761431, "calibration/coverage@0%": 0.11152017370272649, "calibration/coverage@1%": 0.13131184036939314, "calibration/coverage@10%": 0.4511516051011434, "calibration/coverage@15%": 0.5615957563764291, "calibration/coverage@20%": 0.6935809696569921, "calibration/coverage@25%": 0.8204677880386984, "calibration/coverage@30%": 0.8737851802990326, "calibration/coverage@5%": 0.2761035070360598, "calibration/ece": 0.13730210610572074, "calibration/mean_confidence": 0.5631208144994904, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003645833333333348, "completions/max_length": 3001.8, "completions/max_terminated_length": 3001.8, "completions/mean_length": 838.5323120117188, "completions/mean_terminated_length": 841.6531982421875, "completions/min_length": 60.6, "completions/min_terminated_length": 305.6, "epoch": 0.34799565005437433, "grad_norm": 0.002406596438959241, "learning_rate": 1.8975903614457832e-06, "loss": -0.0085, "num_tokens": 330265027.0, "reward": 1.0442128896713256, "reward_std": 0.10550991892814636, "rewards/accuracy_reward": 0.7213541746139527, "rewards/batch_coverage_0": 0.3693415939807892, "rewards/batch_coverage_1": 0.3693415939807892, "rewards/batch_coverage_10": 0.4359207212924957, "rewards/batch_coverage_15": 0.44635804295539855, "rewards/batch_coverage_20": 0.45086843371391294, "rewards/batch_coverage_25": 0.4531724214553833, "rewards/batch_coverage_5": 0.41727402806282043, "rewards/brier_reward": 0.8248473882675171, "rewards/confidence_uniqueness_reward": 0.9416616916656494, "rewards/format_reward": 0.9961805701255798, "rewards/frontier_entropy_batch_reward": -0.3327996253967285, "signal/accuracy_reward/centered_abs_mean": 0.12060546725988389, "signal/accuracy_reward/group_std_mean": 0.16161861419677734, "signal/accuracy_reward/group_zero_std_frac": 0.522222226858139, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9881903648376464, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06030273362994194, "signal/advantage_abs_mean": 0.7486638188362121, "signal/advantage_pre_scale_abs_mean": 0.07886244356632233, "signal/advantage_pre_scale_std": 0.1308459535241127, "signal/advantage_std": 0.9828376650810242, "signal/batch_coverage_0/centered_abs_mean": 0.16480848789215088, "signal/batch_coverage_0/group_std_mean": 0.20503064393997192, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03875530213117599, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002356761461123824, "signal/batch_coverage_1/centered_abs_mean": 0.16480848789215088, "signal/batch_coverage_1/group_std_mean": 0.20503064393997192, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03875530213117599, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002356761461123824, "signal/batch_coverage_10/centered_abs_mean": 0.1872877836227417, "signal/batch_coverage_10/group_std_mean": 0.23541199266910554, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04408646076917648, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0026782152708619833, "signal/batch_coverage_15/centered_abs_mean": 0.191227462887764, "signal/batch_coverage_15/group_std_mean": 0.24096313416957854, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04502549096941948, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0027345526497811077, "signal/batch_coverage_20/centered_abs_mean": 0.1915127158164978, "signal/batch_coverage_20/group_std_mean": 0.24182832539081572, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04509256184101105, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0027386318426579235, "signal/batch_coverage_25/centered_abs_mean": 0.19325141608715057, "signal/batch_coverage_25/group_std_mean": 0.24404240250587464, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04548835903406143, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0027634951751679183, "signal/batch_coverage_5/centered_abs_mean": 0.18021537065505983, "signal/batch_coverage_5/group_std_mean": 0.2256871372461319, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.04239363595843315, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002577079739421606, "signal/brier_reward/centered_abs_mean": 0.1192615807056427, "signal/brier_reward/group_std_mean": 0.15455422103404998, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19625029265880584, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01192615795880556, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02237374596297741, "signal/confidence_uniqueness_reward/group_std_mean": 0.03215453736484051, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03662349134683609, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022373746149241926, "signal/format_reward/centered_abs_mean": 0.006553819449618459, "signal/format_reward/group_std_mean": 0.012815378420054913, "signal/format_reward/group_zero_std_frac": 0.9444444417953491, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05154884457588196, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0032769097248092293, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34422631859779357, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4102538049221039, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5683587074279786, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.034422630071640016, "step": 145 }, { "calibration/aurc": 0.14924825676357584, "calibration/batch_distribution_entropy": 0.9685756850446536, "calibration/buffer_distribution_entropy": 0.9587597216971032, "calibration/confidence_entropy": 0.46276994094843066, "calibration/coverage@0%": 0.047424412133929346, "calibration/coverage@1%": 0.047424412133929346, "calibration/coverage@10%": 0.37805164489722454, "calibration/coverage@15%": 0.5497069426977248, "calibration/coverage@20%": 0.6994764397905759, "calibration/coverage@25%": 0.8060509380453752, "calibration/coverage@30%": 0.8744328097731238, "calibration/coverage@5%": 0.3264170143967666, "calibration/ece": 0.21123060769717777, "calibration/mean_confidence": 0.5284815132218634, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005642361111111094, "completions/max_length": 3231.2, "completions/max_terminated_length": 3231.2, "completions/mean_length": 979.1048828125, "completions/mean_terminated_length": 984.62783203125, "completions/min_length": 0.0, "completions/min_terminated_length": 304.2, "epoch": 0.3599955000562493, "grad_norm": 0.002073184819892049, "learning_rate": 1.7469879518072292e-06, "loss": -0.0112, "num_tokens": 344654651.0, "reward": 1.03697669506073, "reward_std": 0.11584978252649307, "rewards/accuracy_reward": 0.7082465171813965, "rewards/batch_coverage_0": 0.33286563456058504, "rewards/batch_coverage_1": 0.33286563456058504, "rewards/batch_coverage_10": 0.4007334589958191, "rewards/batch_coverage_15": 0.41015023589134214, "rewards/batch_coverage_20": 0.4160439193248749, "rewards/batch_coverage_25": 0.4204522013664246, "rewards/batch_coverage_5": 0.3733904421329498, "rewards/brier_reward": 0.8169546723365784, "rewards/confidence_uniqueness_reward": 0.9425576090812683, "rewards/format_reward": 0.9940104007720947, "rewards/frontier_entropy_batch_reward": -0.28520033359527586, "signal/accuracy_reward/centered_abs_mean": 0.14129231870174408, "signal/accuracy_reward/group_std_mean": 0.18610043525695802, "signal/accuracy_reward/group_zero_std_frac": 0.4666666746139526, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0182552337646484, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07064615935087204, "signal/advantage_abs_mean": 0.7560475707054138, "signal/advantage_pre_scale_abs_mean": 0.0876899853348732, "signal/advantage_pre_scale_std": 0.13981059342622756, "signal/advantage_std": 0.9830022573471069, "signal/batch_coverage_0/centered_abs_mean": 0.16787650883197786, "signal/batch_coverage_0/group_std_mean": 0.21175734102725982, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.035611017048358916, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0024006341118365526, "signal/batch_coverage_1/centered_abs_mean": 0.16787650883197786, "signal/batch_coverage_1/group_std_mean": 0.21175734102725982, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.035611017048358916, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0024006341118365526, "signal/batch_coverage_10/centered_abs_mean": 0.19135403931140899, "signal/batch_coverage_10/group_std_mean": 0.24295100271701814, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04067305475473404, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0027363628149032595, "signal/batch_coverage_15/centered_abs_mean": 0.19120026230812073, "signal/batch_coverage_15/group_std_mean": 0.2438488245010376, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04066586680710316, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002734163636341691, "signal/batch_coverage_20/centered_abs_mean": 0.19174001216888428, "signal/batch_coverage_20/group_std_mean": 0.2444555252790451, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04067807123064995, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002741882111877203, "signal/batch_coverage_25/centered_abs_mean": 0.193832927942276, "signal/batch_coverage_25/group_std_mean": 0.2474503844976425, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04112314581871033, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002771810814738274, "signal/batch_coverage_5/centered_abs_mean": 0.1800677478313446, "signal/batch_coverage_5/group_std_mean": 0.2272460490465164, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.038200228661298755, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0025749687105417252, "signal/brier_reward/centered_abs_mean": 0.12617065459489823, "signal/brier_reward/group_std_mean": 0.16255717277526854, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18507564663887024, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012617065571248531, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022591123729944228, "signal/confidence_uniqueness_reward/group_std_mean": 0.033147013187408446, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.033954189717769624, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002259112545289099, "signal/format_reward/centered_abs_mean": 0.008675130270421504, "signal/format_reward/group_std_mean": 0.016100356169044972, "signal/format_reward/group_zero_std_frac": 0.9333333492279052, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06509829834103584, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004337565135210752, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32594712972640993, "signal/frontier_entropy_batch_reward/group_std_mean": 0.39480751752853394, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.48510047793388367, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03259471394121647, "step": 150 }, { "epoch": 0.3599955000562493, "eval_calibration/aurc": 0.15099810091529658, "eval_calibration/batch_distribution_entropy": 0.860150571119397, "eval_calibration/buffer_distribution_entropy": 0.9641795024573184, "eval_calibration/confidence_entropy": 0.45122327873025564, "eval_calibration/coverage@0%": 0.17708333333333334, "eval_calibration/coverage@1%": 0.17708333333333334, "eval_calibration/coverage@10%": 0.4791666666666667, "eval_calibration/coverage@15%": 0.6458333333333334, "eval_calibration/coverage@20%": 0.8385416666666666, "eval_calibration/coverage@25%": 0.9270833333333334, "eval_calibration/coverage@30%": 0.9739583333333334, "eval_calibration/coverage@5%": 0.28125, "eval_calibration/ece": 0.2121776145833333, "eval_calibration/mean_confidence": 0.62442065625, "eval_completions/clipped_ratio": 0.00434027777777779, "eval_completions/max_length": 2256.6666666666665, "eval_completions/max_terminated_length": 2256.6666666666665, "eval_completions/mean_length": 999.3628641764323, "eval_completions/mean_terminated_length": 1003.8037719726562, "eval_completions/min_length": 206.66666666666666, "eval_completions/min_terminated_length": 420.8333333333333, "eval_loss": 0.0, "eval_num_tokens": 344654651.0, "eval_reward": 0.9219675163427988, "eval_reward_std": 0.2387667124470075, "eval_rewards/accuracy_reward": 0.6883680621782938, "eval_rewards/batch_coverage_0": 0.04401817545294762, "eval_rewards/batch_coverage_1": 0.04401817545294762, "eval_rewards/batch_coverage_10": 0.06404042275001605, "eval_rewards/batch_coverage_15": 0.10943988462289174, "eval_rewards/batch_coverage_20": 0.15806531036893526, "eval_rewards/batch_coverage_25": 0.2252073884010315, "eval_rewards/batch_coverage_5": 0.05134963368376096, "eval_rewards/brier_reward": 0.81276535987854, "eval_rewards/confidence_uniqueness_reward": 0.8863547345002493, "eval_rewards/format_reward": 0.9947916766007742, "eval_rewards/frontier_entropy_batch_reward": -0.9947916766007742, "eval_runtime": 177.8666, "eval_samples_per_second": 5.622, "eval_signal/accuracy_reward/centered_abs_mean": 0.4130316823720932, "eval_signal/accuracy_reward/group_std_mean": 0.46021030843257904, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8719471295674642, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.2065158411860466, "eval_signal/advantage_abs_mean": 0.8689649800459543, "eval_signal/advantage_pre_scale_abs_mean": 0.20798486719528833, "eval_signal/advantage_pre_scale_std": 0.23649577299753824, "eval_signal/advantage_std": 0.9864074190457662, "eval_signal/batch_coverage_0/centered_abs_mean": 0.204094630976518, "eval_signal/batch_coverage_0/group_std_mean": 0.29198014239470166, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.012345977903654179, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0029185531505693993, "eval_signal/batch_coverage_1/centered_abs_mean": 0.204094630976518, "eval_signal/batch_coverage_1/group_std_mean": 0.29198014239470166, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.012345977903654179, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0029185531505693993, "eval_signal/batch_coverage_10/centered_abs_mean": 0.1684000020225843, "eval_signal/batch_coverage_10/group_std_mean": 0.23056225727001825, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.010209185304120183, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.0024081199468734362, "eval_signal/batch_coverage_15/centered_abs_mean": 0.1649662603934606, "eval_signal/batch_coverage_15/group_std_mean": 0.22036516666412354, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.00998325839949151, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.00235901753573368, "eval_signal/batch_coverage_20/centered_abs_mean": 0.1855208824078242, "eval_signal/batch_coverage_20/group_std_mean": 0.23475823799769083, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011228633423646292, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.002652948683438202, "eval_signal/batch_coverage_25/centered_abs_mean": 0.259243165453275, "eval_signal/batch_coverage_25/group_std_mean": 0.31990206241607666, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.015624676054964462, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0037071771221235394, "eval_signal/batch_coverage_5/centered_abs_mean": 0.19574902951717377, "eval_signal/batch_coverage_5/group_std_mean": 0.2785518889625867, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.011863870856662592, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.0027992110699415207, "eval_signal/brier_reward/centered_abs_mean": 0.18697136143843332, "eval_signal/brier_reward/group_std_mean": 0.24710966646671295, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07883285731077194, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.018697135771314304, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04890578364332517, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.06993046589195728, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02052827924489975, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0048905784885088606, "eval_signal/format_reward/centered_abs_mean": 0.010091145678112904, "eval_signal/format_reward/group_std_mean": 0.029462782355646294, "eval_signal/format_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.020615727019806702, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.005045572839056452, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.010091145678112904, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.029462782355646294, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004123145559181769, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0010091145522892475, "eval_steps_per_second": 0.034, "step": 150 }, { "calibration/aurc": 0.12354927845408956, "calibration/batch_distribution_entropy": 0.9447950727046628, "calibration/buffer_distribution_entropy": 0.9657919927678869, "calibration/confidence_entropy": 0.4733415813935542, "calibration/coverage@0%": 0.03344089186362829, "calibration/coverage@1%": 0.03344089186362829, "calibration/coverage@10%": 0.5678552001740644, "calibration/coverage@15%": 0.714853949086162, "calibration/coverage@20%": 0.8357838337684944, "calibration/coverage@25%": 0.9004188424717146, "calibration/coverage@30%": 0.9442232375979112, "calibration/coverage@5%": 0.33468212598936475, "calibration/ece": 0.16670257491313864, "calibration/mean_confidence": 0.6146071591544293, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444442, "completions/max_length": 3241.8, "completions/max_terminated_length": 3241.8, "completions/mean_length": 996.8377685546875, "completions/mean_terminated_length": 999.9290771484375, "completions/min_length": 0.0, "completions/min_terminated_length": 359.8, "epoch": 0.3719953500581243, "grad_norm": 0.0023586146999150515, "learning_rate": 1.5963855421686747e-06, "loss": -0.0001, "num_tokens": 359245934.0, "reward": 1.0534717082977294, "reward_std": 0.11110627800226211, "rewards/accuracy_reward": 0.741406238079071, "rewards/batch_coverage_0": 0.3595211863517761, "rewards/batch_coverage_1": 0.3595211863517761, "rewards/batch_coverage_10": 0.4280384540557861, "rewards/batch_coverage_15": 0.43416666984558105, "rewards/batch_coverage_20": 0.443005895614624, "rewards/batch_coverage_25": 0.44818037152290346, "rewards/batch_coverage_5": 0.40228732824325564, "rewards/brier_reward": 0.8326889395713806, "rewards/confidence_uniqueness_reward": 0.9405958533287049, "rewards/format_reward": 0.9967013955116272, "rewards/frontier_entropy_batch_reward": -0.3401912569999695, "signal/accuracy_reward/centered_abs_mean": 0.13376193642616271, "signal/accuracy_reward/group_std_mean": 0.17467234432697296, "signal/accuracy_reward/group_zero_std_frac": 0.5055555701255798, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0903648853302002, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06688096821308136, "signal/advantage_abs_mean": 0.753935980796814, "signal/advantage_pre_scale_abs_mean": 0.0841080218553543, "signal/advantage_pre_scale_std": 0.13811270892620087, "signal/advantage_std": 0.982841157913208, "signal/batch_coverage_0/centered_abs_mean": 0.15721264779567717, "signal/batch_coverage_0/group_std_mean": 0.19686352014541625, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03701254278421402, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022481407970190046, "signal/batch_coverage_1/centered_abs_mean": 0.15721264779567717, "signal/batch_coverage_1/group_std_mean": 0.19686352014541625, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03701254278421402, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022481407970190046, "signal/batch_coverage_10/centered_abs_mean": 0.17821633517742158, "signal/batch_coverage_10/group_std_mean": 0.22452322840690614, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04192670062184334, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0025484934914857147, "signal/batch_coverage_15/centered_abs_mean": 0.1773567318916321, "signal/batch_coverage_15/group_std_mean": 0.22416456639766694, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04177441671490669, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.00253620115108788, "signal/batch_coverage_20/centered_abs_mean": 0.1758658766746521, "signal/batch_coverage_20/group_std_mean": 0.2232010543346405, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.041483426839113234, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025148820132017136, "signal/batch_coverage_25/centered_abs_mean": 0.1810859262943268, "signal/batch_coverage_25/group_std_mean": 0.22983238101005554, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04270444884896278, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002589528728276491, "signal/batch_coverage_5/centered_abs_mean": 0.16989169120788575, "signal/batch_coverage_5/group_std_mean": 0.2133048117160797, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03996897637844086, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.00242945128120482, "signal/brier_reward/centered_abs_mean": 0.11728623360395432, "signal/brier_reward/group_std_mean": 0.152217036485672, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.19326700866222382, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011728623509407043, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022488463297486307, "signal/confidence_uniqueness_reward/group_std_mean": 0.03181953355669975, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03719873316586018, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022488463902845977, "signal/format_reward/centered_abs_mean": 0.005566406203433872, "signal/format_reward/group_std_mean": 0.010888969711959361, "signal/format_reward/group_zero_std_frac": 0.9527777791023254, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04535420574247837, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.002783203101716936, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33973073959350586, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40602503418922425, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5617510795593261, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033973073959350585, "step": 155 }, { "calibration/aurc": 0.10859563453296701, "calibration/batch_distribution_entropy": 0.9128716647986437, "calibration/buffer_distribution_entropy": 0.9675145591680068, "calibration/confidence_entropy": 0.46302861735613277, "calibration/coverage@0%": 0.14967362924281985, "calibration/coverage@1%": 0.18256500217580504, "calibration/coverage@10%": 0.665139670609524, "calibration/coverage@15%": 0.7675148108312609, "calibration/coverage@20%": 0.822568986367634, "calibration/coverage@25%": 0.8676011433597186, "calibration/coverage@30%": 0.8970976253298154, "calibration/coverage@5%": 0.5232171997389033, "calibration/ece": 0.12741486753620287, "calibration/mean_confidence": 0.653657961062989, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00546875, "completions/max_length": 3256.4, "completions/max_terminated_length": 3256.4, "completions/mean_length": 1002.3953002929687, "completions/mean_terminated_length": 1007.9062255859375, "completions/min_length": 0.0, "completions/min_terminated_length": 312.6, "epoch": 0.38399520005999926, "grad_norm": 0.0021716596093028784, "learning_rate": 1.4457831325301204e-06, "loss": -0.0154, "num_tokens": 373880824.0, "reward": 1.01896413564682, "reward_std": 0.11577980667352676, "rewards/accuracy_reward": 0.6807291746139527, "rewards/batch_coverage_0": 0.34664570689201357, "rewards/batch_coverage_1": 0.34664570689201357, "rewards/batch_coverage_10": 0.41527708768844607, "rewards/batch_coverage_15": 0.41911380290985106, "rewards/batch_coverage_20": 0.42773756980895994, "rewards/batch_coverage_25": 0.431087327003479, "rewards/batch_coverage_5": 0.38476539254188535, "rewards/brier_reward": 0.8177592873573303, "rewards/confidence_uniqueness_reward": 0.9373740196228028, "rewards/format_reward": 0.9927083134651185, "rewards/frontier_entropy_batch_reward": -0.32897132635116577, "signal/accuracy_reward/centered_abs_mean": 0.13287760615348815, "signal/accuracy_reward/group_std_mean": 0.17416149377822876, "signal/accuracy_reward/group_zero_std_frac": 0.5083333373069763, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0287292838096618, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06643880307674407, "signal/advantage_abs_mean": 0.7522549510002137, "signal/advantage_pre_scale_abs_mean": 0.08743151724338531, "signal/advantage_pre_scale_std": 0.14282708168029784, "signal/advantage_std": 0.9829399466514588, "signal/batch_coverage_0/centered_abs_mean": 0.15453094840049744, "signal/batch_coverage_0/group_std_mean": 0.19441550970077515, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03416990488767624, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022097925655543806, "signal/batch_coverage_1/centered_abs_mean": 0.15453094840049744, "signal/batch_coverage_1/group_std_mean": 0.19441550970077515, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03416990488767624, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022097925655543806, "signal/batch_coverage_10/centered_abs_mean": 0.17795575857162477, "signal/batch_coverage_10/group_std_mean": 0.2252943605184555, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.039376800507307054, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002544767269864678, "signal/batch_coverage_15/centered_abs_mean": 0.17797406911849975, "signal/batch_coverage_15/group_std_mean": 0.22535083889961244, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03939326554536819, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002545029018074274, "signal/batch_coverage_20/centered_abs_mean": 0.18285177052021026, "signal/batch_coverage_20/group_std_mean": 0.23180427253246308, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04048406258225441, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0026147802826017143, "signal/batch_coverage_25/centered_abs_mean": 0.18532668352127074, "signal/batch_coverage_25/group_std_mean": 0.23487534523010253, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0410227507352829, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026501716580241918, "signal/batch_coverage_5/centered_abs_mean": 0.16579327285289763, "signal/batch_coverage_5/group_std_mean": 0.20903973281383514, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03667203634977341, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023708438500761988, "signal/brier_reward/centered_abs_mean": 0.12218662053346634, "signal/brier_reward/group_std_mean": 0.15716723203659058, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18889405727386474, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.012218662351369858, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02724316492676735, "signal/confidence_uniqueness_reward/group_std_mean": 0.039476413279771805, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.042055241763591766, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027243165764957666, "signal/format_reward/centered_abs_mean": 0.011024305410683156, "signal/format_reward/group_std_mean": 0.01946833319962025, "signal/format_reward/group_zero_std_frac": 0.9222222447395325, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.08364587873220444, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005512152705341578, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3342073380947113, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4018004834651947, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5187286734580994, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03342073522508145, "step": 160 }, { "calibration/aurc": 0.13086824597819938, "calibration/batch_distribution_entropy": 0.9472034847512509, "calibration/buffer_distribution_entropy": 0.9666214722500699, "calibration/confidence_entropy": 0.45481414785057267, "calibration/coverage@0%": 0.04582357782475198, "calibration/coverage@1%": 0.09478191115808532, "calibration/coverage@10%": 0.53772076030555, "calibration/coverage@15%": 0.6519230017685322, "calibration/coverage@20%": 0.780576519934485, "calibration/coverage@25%": 0.8727473524512137, "calibration/coverage@30%": 0.9090184937611407, "calibration/coverage@5%": 0.33352284393052795, "calibration/ece": 0.17592407358849602, "calibration/mean_confidence": 0.5471417070050063, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0050347222222222095, "completions/max_length": 3369.8, "completions/max_terminated_length": 3369.8, "completions/mean_length": 1023.3869873046875, "completions/mean_terminated_length": 1028.52685546875, "completions/min_length": 0.0, "completions/min_terminated_length": 362.0, "epoch": 0.39599505006187424, "grad_norm": 0.002039579441770911, "learning_rate": 1.2951807228915664e-06, "loss": -0.0096, "num_tokens": 388809314.0, "reward": 1.0179754614830017, "reward_std": 0.11339564919471741, "rewards/accuracy_reward": 0.6663194417953491, "rewards/batch_coverage_0": 0.3367995202541351, "rewards/batch_coverage_1": 0.3367995202541351, "rewards/batch_coverage_10": 0.3942675650119781, "rewards/batch_coverage_15": 0.4045478582382202, "rewards/batch_coverage_20": 0.4149549126625061, "rewards/batch_coverage_25": 0.4195962190628052, "rewards/batch_coverage_5": 0.3712313652038574, "rewards/brier_reward": 0.8144054651260376, "rewards/confidence_uniqueness_reward": 0.9440905332565308, "rewards/format_reward": 0.9947048664093018, "rewards/frontier_entropy_batch_reward": -0.266845241189003, "signal/accuracy_reward/centered_abs_mean": 0.13079427033662797, "signal/accuracy_reward/group_std_mean": 0.1758062332868576, "signal/accuracy_reward/group_zero_std_frac": 0.4861111223697662, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9914317846298217, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06539713516831398, "signal/advantage_abs_mean": 0.7486802816390992, "signal/advantage_pre_scale_abs_mean": 0.0846073180437088, "signal/advantage_pre_scale_std": 0.13776987195014953, "signal/advantage_std": 0.9829599499702454, "signal/batch_coverage_0/centered_abs_mean": 0.15715940296649933, "signal/batch_coverage_0/group_std_mean": 0.2002890944480896, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03427259549498558, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002247379417531192, "signal/batch_coverage_1/centered_abs_mean": 0.15715940296649933, "signal/batch_coverage_1/group_std_mean": 0.2002890944480896, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03427259549498558, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002247379417531192, "signal/batch_coverage_10/centered_abs_mean": 0.17147505581378936, "signal/batch_coverage_10/group_std_mean": 0.21927900314331056, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.037464487552642825, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002452093129977584, "signal/batch_coverage_15/centered_abs_mean": 0.1753909856081009, "signal/batch_coverage_15/group_std_mean": 0.22453003227710724, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03826990127563477, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025080910883843897, "signal/batch_coverage_20/centered_abs_mean": 0.17529043555259705, "signal/batch_coverage_20/group_std_mean": 0.22489323019981383, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03827468380331993, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025066531728953124, "signal/batch_coverage_25/centered_abs_mean": 0.17560572326183319, "signal/batch_coverage_25/group_std_mean": 0.22626294195652008, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0383146844804287, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025111618917435406, "signal/batch_coverage_5/centered_abs_mean": 0.16724469661712646, "signal/batch_coverage_5/group_std_mean": 0.21355923116207123, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0364637590944767, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023915990255773067, "signal/brier_reward/centered_abs_mean": 0.11954751163721085, "signal/brier_reward/group_std_mean": 0.15770359933376313, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18224007785320281, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011954751610755921, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.022302094474434854, "signal/confidence_uniqueness_reward/group_std_mean": 0.03391831777989864, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03458714000880718, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0022302095079794527, "signal/format_reward/centered_abs_mean": 0.009185112826526166, "signal/format_reward/group_std_mean": 0.017785289883613588, "signal/format_reward/group_zero_std_frac": 0.925000011920929, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0728104680776596, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.004592556413263083, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.31106963753700256, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3822822213172913, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4759585916996002, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.031106964498758317, "step": 165 }, { "calibration/aurc": 0.1460920793342822, "calibration/batch_distribution_entropy": 0.8878700887244488, "calibration/buffer_distribution_entropy": 0.9668655536975838, "calibration/confidence_entropy": 0.43763451430842687, "calibration/coverage@0%": 0.018759519147084423, "calibration/coverage@1%": 0.018759519147084423, "calibration/coverage@10%": 0.42567357138337325, "calibration/coverage@15%": 0.6675902364909806, "calibration/coverage@20%": 0.8197856721949514, "calibration/coverage@25%": 0.8918473680112828, "calibration/coverage@30%": 0.943480103345044, "calibration/coverage@5%": 0.08334285248041776, "calibration/ece": 0.08988107862443054, "calibration/mean_confidence": 0.6858332701331313, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.004947916666666696, "completions/max_length": 3697.6, "completions/max_terminated_length": 3697.6, "completions/mean_length": 1010.5817749023438, "completions/mean_terminated_length": 1015.6004760742187, "completions/min_length": 0.0, "completions/min_terminated_length": 339.6, "epoch": 0.4079949000637492, "grad_norm": 0.0021854499354958534, "learning_rate": 1.1445783132530121e-06, "loss": -0.0112, "num_tokens": 403540400.0, "reward": 1.0409629344940186, "reward_std": 0.110990709066391, "rewards/accuracy_reward": 0.7241319417953491, "rewards/batch_coverage_0": 0.37385630011558535, "rewards/batch_coverage_1": 0.37385630011558535, "rewards/batch_coverage_10": 0.4578577697277069, "rewards/batch_coverage_15": 0.46536206603050234, "rewards/batch_coverage_20": 0.472717946767807, "rewards/batch_coverage_25": 0.47600876092910765, "rewards/batch_coverage_5": 0.42083380818367006, "rewards/brier_reward": 0.8414793729782104, "rewards/confidence_uniqueness_reward": 0.9344589471817016, "rewards/format_reward": 0.9950520753860473, "rewards/frontier_entropy_batch_reward": -0.39701979160308837, "signal/accuracy_reward/centered_abs_mean": 0.1273980036377907, "signal/accuracy_reward/group_std_mean": 0.17125212252140046, "signal/accuracy_reward/group_zero_std_frac": 0.5000000119209289, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0747498750686646, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06369900181889535, "signal/advantage_abs_mean": 0.7358132243156433, "signal/advantage_pre_scale_abs_mean": 0.08221107721328735, "signal/advantage_pre_scale_std": 0.13774741888046266, "signal/advantage_std": 0.9827988147735596, "signal/batch_coverage_0/centered_abs_mean": 0.15270276367664337, "signal/batch_coverage_0/group_std_mean": 0.19141809046268463, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03704690709710121, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021836495259776713, "signal/batch_coverage_1/centered_abs_mean": 0.15270276367664337, "signal/batch_coverage_1/group_std_mean": 0.19141809046268463, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03704690709710121, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021836495259776713, "signal/batch_coverage_10/centered_abs_mean": 0.17446030974388121, "signal/batch_coverage_10/group_std_mean": 0.22139351963996887, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.04217695370316506, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002494782442227006, "signal/batch_coverage_15/centered_abs_mean": 0.17822808623313904, "signal/batch_coverage_15/group_std_mean": 0.2266361564397812, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04310111626982689, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025486615020781757, "signal/batch_coverage_20/centered_abs_mean": 0.1822631001472473, "signal/batch_coverage_20/group_std_mean": 0.23209832310676576, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.044050132483243944, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0026063622906804087, "signal/batch_coverage_25/centered_abs_mean": 0.1828020066022873, "signal/batch_coverage_25/group_std_mean": 0.2328880548477173, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04419608265161514, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026140686590224505, "signal/batch_coverage_5/centered_abs_mean": 0.16622574925422667, "signal/batch_coverage_5/group_std_mean": 0.20924559533596038, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.040277618914842606, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002377028251066804, "signal/brier_reward/centered_abs_mean": 0.10948136746883393, "signal/brier_reward/group_std_mean": 0.14314137399196625, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18513674437999725, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010948137007653713, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.026617776975035666, "signal/confidence_uniqueness_reward/group_std_mean": 0.037878865376114845, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04497217386960983, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0026617777068167923, "signal/format_reward/centered_abs_mean": 0.007883029547519982, "signal/format_reward/group_std_mean": 0.014791851490736007, "signal/format_reward/group_zero_std_frac": 0.9361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.0656326726078987, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003941514773759991, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.35060847997665406, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4169707238674164, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5929932951927185, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03506084680557251, "step": 170 }, { "calibration/aurc": 0.0988401164454502, "calibration/batch_distribution_entropy": 0.950985405780948, "calibration/buffer_distribution_entropy": 0.9654078631203739, "calibration/confidence_entropy": 0.46972934712497294, "calibration/coverage@0%": 0.1369407160085553, "calibration/coverage@1%": 0.1495061610347333, "calibration/coverage@10%": 0.5456195683369263, "calibration/coverage@15%": 0.7819692461155509, "calibration/coverage@20%": 0.9106661893478408, "calibration/coverage@25%": 0.9679260661150175, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.32950498555415597, "calibration/ece": 0.18936169514098597, "calibration/mean_confidence": 0.5877690730480742, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00894097222222221, "completions/max_length": 3482.6, "completions/max_terminated_length": 3482.6, "completions/mean_length": 1032.5648803710938, "completions/mean_terminated_length": 1041.8525634765624, "completions/min_length": 0.0, "completions/min_terminated_length": 339.6, "epoch": 0.4199947500656242, "grad_norm": 0.0020933363121002913, "learning_rate": 9.93975903614458e-07, "loss": -0.0201, "num_tokens": 418543515.0, "reward": 1.0426448345184327, "reward_std": 0.11992864906787873, "rewards/accuracy_reward": 0.7254340291023255, "rewards/batch_coverage_0": 0.3316203892230988, "rewards/batch_coverage_1": 0.3316203892230988, "rewards/batch_coverage_10": 0.4126744449138641, "rewards/batch_coverage_15": 0.4222039520740509, "rewards/batch_coverage_20": 0.4333043694496155, "rewards/batch_coverage_25": 0.43859469294548037, "rewards/batch_coverage_5": 0.3796039819717407, "rewards/brier_reward": 0.8254922986030578, "rewards/confidence_uniqueness_reward": 0.9372236371040344, "rewards/format_reward": 0.9900173664093017, "rewards/frontier_entropy_batch_reward": -0.3067204415798187, "signal/accuracy_reward/centered_abs_mean": 0.1384494349360466, "signal/accuracy_reward/group_std_mean": 0.18521187007427214, "signal/accuracy_reward/group_zero_std_frac": 0.4638888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0526963233947755, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.0692247174680233, "signal/advantage_abs_mean": 0.7402390241622925, "signal/advantage_pre_scale_abs_mean": 0.08802802860736847, "signal/advantage_pre_scale_std": 0.14837550222873688, "signal/advantage_std": 0.982962167263031, "signal/batch_coverage_0/centered_abs_mean": 0.15836002230644225, "signal/batch_coverage_0/group_std_mean": 0.19843866527080536, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03455947414040565, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0022645482793450357, "signal/batch_coverage_1/centered_abs_mean": 0.15836002230644225, "signal/batch_coverage_1/group_std_mean": 0.19843866527080536, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03455947414040565, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0022645482793450357, "signal/batch_coverage_10/centered_abs_mean": 0.18111312985420228, "signal/batch_coverage_10/group_std_mean": 0.22880154252052307, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03951704278588295, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002589917741715908, "signal/batch_coverage_15/centered_abs_mean": 0.1822204291820526, "signal/batch_coverage_15/group_std_mean": 0.23108108341693878, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03973933905363083, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002605752134695649, "signal/batch_coverage_20/centered_abs_mean": 0.18789599537849427, "signal/batch_coverage_20/group_std_mean": 0.23864111602306365, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.040997046232223514, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0026869128923863173, "signal/batch_coverage_25/centered_abs_mean": 0.19290711581707, "signal/batch_coverage_25/group_std_mean": 0.24501362144947053, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.0420980766415596, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002758571831509471, "signal/batch_coverage_5/centered_abs_mean": 0.17007304430007936, "signal/batch_coverage_5/group_std_mean": 0.2132638841867447, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.037092185020446776, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024320445023477077, "signal/brier_reward/centered_abs_mean": 0.11827056854963303, "signal/brier_reward/group_std_mean": 0.15516594052314758, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18027611076831818, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011827056482434272, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02871614247560501, "signal/confidence_uniqueness_reward/group_std_mean": 0.043540383130311965, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.044160565733909606, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002871614182367921, "signal/format_reward/centered_abs_mean": 0.01482747383415699, "signal/format_reward/group_std_mean": 0.02657197751104832, "signal/format_reward/group_zero_std_frac": 0.8944444537162781, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.11438467130064964, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007413736917078495, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32246204614639284, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3913597702980042, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.49285311698913575, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03224620372056961, "step": 175 }, { "calibration/aurc": 0.07447960795488509, "calibration/batch_distribution_entropy": 0.9584111436492234, "calibration/buffer_distribution_entropy": 0.9659745360423712, "calibration/confidence_entropy": 0.4698024707118339, "calibration/coverage@0%": 0.08605942025799326, "calibration/coverage@1%": 0.2450125384594614, "calibration/coverage@10%": 0.7343520715607339, "calibration/coverage@15%": 0.8616261686041685, "calibration/coverage@20%": 0.9261610751354826, "calibration/coverage@25%": 0.9570680628272251, "calibration/coverage@30%": 0.9738219895287958, "calibration/coverage@5%": 0.5089204790445596, "calibration/ece": 0.20025433500219494, "calibration/mean_confidence": 0.5928821674128748, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.009027777777777768, "completions/max_length": 3789.4, "completions/max_terminated_length": 3789.4, "completions/mean_length": 999.3691162109375, "completions/mean_terminated_length": 1008.4784057617187, "completions/min_length": 0.0, "completions/min_terminated_length": 367.6, "epoch": 0.4319946000674992, "grad_norm": 0.0020875383634120226, "learning_rate": 8.433734939759036e-07, "loss": -0.0193, "num_tokens": 433156215.0, "reward": 1.0356385707855225, "reward_std": 0.12121915966272354, "rewards/accuracy_reward": 0.7159722328186036, "rewards/batch_coverage_0": 0.3323968589305878, "rewards/batch_coverage_1": 0.3323968589305878, "rewards/batch_coverage_10": 0.390076208114624, "rewards/batch_coverage_15": 0.3979366660118103, "rewards/batch_coverage_20": 0.409983891248703, "rewards/batch_coverage_25": 0.41360154151916506, "rewards/batch_coverage_5": 0.36534075140953065, "rewards/brier_reward": 0.8069019794464112, "rewards/confidence_uniqueness_reward": 0.9386700630187989, "rewards/format_reward": 0.990625, "rewards/frontier_entropy_batch_reward": -0.2999406814575195, "signal/accuracy_reward/centered_abs_mean": 0.13981119692325591, "signal/accuracy_reward/group_std_mean": 0.18070069551467896, "signal/accuracy_reward/group_zero_std_frac": 0.4972222328186035, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0410431146621704, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06990559846162796, "signal/advantage_abs_mean": 0.7573927998542785, "signal/advantage_pre_scale_abs_mean": 0.09159428179264069, "signal/advantage_pre_scale_std": 0.15012040138244628, "signal/advantage_std": 0.9829983353614807, "signal/batch_coverage_0/centered_abs_mean": 0.15274601578712463, "signal/batch_coverage_0/group_std_mean": 0.1947482168674469, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03270627558231354, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002184268040582538, "signal/batch_coverage_1/centered_abs_mean": 0.15274601578712463, "signal/batch_coverage_1/group_std_mean": 0.1947482168674469, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03270627558231354, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002184268040582538, "signal/batch_coverage_10/centered_abs_mean": 0.16684764623641968, "signal/batch_coverage_10/group_std_mean": 0.2129494309425354, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03565559573471546, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0023859212175011634, "signal/batch_coverage_15/centered_abs_mean": 0.17019396722316743, "signal/batch_coverage_15/group_std_mean": 0.2171155631542206, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03635375127196312, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002433773782104254, "signal/batch_coverage_20/centered_abs_mean": 0.17045858800411223, "signal/batch_coverage_20/group_std_mean": 0.21784802675247192, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.036375219374895094, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0024375576991587876, "signal/batch_coverage_25/centered_abs_mean": 0.17278562784194945, "signal/batch_coverage_25/group_std_mean": 0.22063855230808258, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03690861836075783, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002470834506675601, "signal/batch_coverage_5/centered_abs_mean": 0.16150131821632385, "signal/batch_coverage_5/group_std_mean": 0.20543525516986846, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03454604744911194, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023094687378033994, "signal/brier_reward/centered_abs_mean": 0.11990782469511033, "signal/brier_reward/group_std_mean": 0.15653329491615295, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17853015959262847, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011990783363580703, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02739548645913601, "signal/confidence_uniqueness_reward/group_std_mean": 0.043045850843191145, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0408272460103035, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0027395487297326325, "signal/format_reward/centered_abs_mean": 0.01424696184694767, "signal/format_reward/group_std_mean": 0.02710859403014183, "signal/format_reward/group_zero_std_frac": 0.8861111164093017, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.10568692535161972, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.007123480923473835, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32398450970649717, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3915144741535187, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4832557141780853, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03239845186471939, "step": 180 }, { "calibration/aurc": 0.15428302573115638, "calibration/batch_distribution_entropy": 0.9357928315322835, "calibration/buffer_distribution_entropy": 0.9662215895426162, "calibration/confidence_entropy": 0.465163569917188, "calibration/coverage@0%": 0.027238980153342723, "calibration/coverage@1%": 0.027238980153342723, "calibration/coverage@10%": 0.23795636954751256, "calibration/coverage@15%": 0.6718151575126063, "calibration/coverage@20%": 0.751713937740079, "calibration/coverage@25%": 0.9252781413612565, "calibration/coverage@30%": 0.972780322862129, "calibration/coverage@5%": 0.0916245972827669, "calibration/ece": 0.17777518535858378, "calibration/mean_confidence": 0.6189465787988068, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00425347222222221, "completions/max_length": 3489.6, "completions/max_terminated_length": 3489.6, "completions/mean_length": 982.99912109375, "completions/mean_terminated_length": 987.1817993164062, "completions/min_length": 0.0, "completions/min_terminated_length": 304.2, "epoch": 0.44399445006937416, "grad_norm": 0.002235629130154848, "learning_rate": 6.927710843373495e-07, "loss": -0.0098, "num_tokens": 447570413.0, "reward": 1.0250831365585327, "reward_std": 0.1192633032798767, "rewards/accuracy_reward": 0.6859375, "rewards/batch_coverage_0": 0.33424278497695925, "rewards/batch_coverage_1": 0.33424278497695925, "rewards/batch_coverage_10": 0.40887650847435, "rewards/batch_coverage_15": 0.4263133108615875, "rewards/batch_coverage_20": 0.43644038438796995, "rewards/batch_coverage_25": 0.4396053493022919, "rewards/batch_coverage_5": 0.3843961298465729, "rewards/brier_reward": 0.8267574667930603, "rewards/confidence_uniqueness_reward": 0.9412113428115845, "rewards/format_reward": 0.9952256917953491, "rewards/frontier_entropy_batch_reward": -0.31822256445884706, "signal/accuracy_reward/centered_abs_mean": 0.14396701455116273, "signal/accuracy_reward/group_std_mean": 0.1907973110675812, "signal/accuracy_reward/group_zero_std_frac": 0.4527777791023254, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0355172395706176, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.07198350727558137, "signal/advantage_abs_mean": 0.75169757604599, "signal/advantage_pre_scale_abs_mean": 0.08948174864053726, "signal/advantage_pre_scale_std": 0.1425995260477066, "signal/advantage_std": 0.9830433845520019, "signal/batch_coverage_0/centered_abs_mean": 0.15130364298820495, "signal/batch_coverage_0/group_std_mean": 0.19145098626613616, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03131290823221207, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021636419463902713, "signal/batch_coverage_1/centered_abs_mean": 0.15130364298820495, "signal/batch_coverage_1/group_std_mean": 0.19145098626613616, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03131290823221207, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021636419463902713, "signal/batch_coverage_10/centered_abs_mean": 0.1710590809583664, "signal/batch_coverage_10/group_std_mean": 0.21738926470279693, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.0353410042822361, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002446144772693515, "signal/batch_coverage_15/centered_abs_mean": 0.1803262084722519, "signal/batch_coverage_15/group_std_mean": 0.2301064521074295, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03722064346075058, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025786647107452153, "signal/batch_coverage_20/centered_abs_mean": 0.1807143956422806, "signal/batch_coverage_20/group_std_mean": 0.2311330646276474, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.03731723800301552, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025842157658189533, "signal/batch_coverage_25/centered_abs_mean": 0.1803894132375717, "signal/batch_coverage_25/group_std_mean": 0.23168642818927765, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03726850301027298, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025795686990022658, "signal/batch_coverage_5/centered_abs_mean": 0.16504639387130737, "signal/batch_coverage_5/group_std_mean": 0.20915851891040801, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03413994573056698, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0023601633962243795, "signal/brier_reward/centered_abs_mean": 0.11856148093938827, "signal/brier_reward/group_std_mean": 0.15411594808101653, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1713525176048279, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011856148391962052, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.024256234616041185, "signal/confidence_uniqueness_reward/group_std_mean": 0.03698706589639187, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03531105667352676, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0024256234988570212, "signal/format_reward/centered_abs_mean": 0.008620876912027597, "signal/format_reward/group_std_mean": 0.018049365282058714, "signal/format_reward/group_zero_std_frac": 0.919444453716278, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.06351001970469952, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0043104384560137985, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3371231436729431, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4051863729953766, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.4865589618682861, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033712317049503324, "step": 185 }, { "calibration/aurc": 0.12534142353476713, "calibration/batch_distribution_entropy": 0.8935035689275217, "calibration/buffer_distribution_entropy": 0.9657430288586395, "calibration/confidence_entropy": 0.44360261796641653, "calibration/coverage@0%": 0.062130862681003, "calibration/coverage@1%": 0.09181836268100299, "calibration/coverage@10%": 0.5136960610219088, "calibration/coverage@15%": 0.5486943991865573, "calibration/coverage@20%": 0.8319114254606881, "calibration/coverage@25%": 0.9504922758920801, "calibration/coverage@30%": 0.9932291666666668, "calibration/coverage@5%": 0.4307086185865148, "calibration/ece": 0.15708990215467875, "calibration/mean_confidence": 0.6694349975699834, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.003038194444444442, "completions/max_length": 3577.2, "completions/max_terminated_length": 3577.2, "completions/mean_length": 973.215625, "completions/mean_terminated_length": 976.1800170898438, "completions/min_length": 0.0, "completions/min_terminated_length": 320.2, "epoch": 0.45599430007124914, "grad_norm": 0.002355813281610608, "learning_rate": 5.421686746987952e-07, "loss": -0.0063, "num_tokens": 461864801.0, "reward": 1.0505224466323853, "reward_std": 0.10988152772188187, "rewards/accuracy_reward": 0.7420138955116272, "rewards/batch_coverage_0": 0.35429942011833193, "rewards/batch_coverage_1": 0.35429942011833193, "rewards/batch_coverage_10": 0.43965146541595457, "rewards/batch_coverage_15": 0.446388041973114, "rewards/batch_coverage_20": 0.4566673099994659, "rewards/batch_coverage_25": 0.4625507116317749, "rewards/batch_coverage_5": 0.40552425384521484, "rewards/brier_reward": 0.8376203894615173, "rewards/confidence_uniqueness_reward": 0.9377862334251403, "rewards/format_reward": 0.9969618082046509, "rewards/frontier_entropy_batch_reward": -0.38253204226493837, "signal/accuracy_reward/centered_abs_mean": 0.13186849057674407, "signal/accuracy_reward/group_std_mean": 0.17384477853775024, "signal/accuracy_reward/group_zero_std_frac": 0.5000000178813935, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 1.0667516231536864, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06593424528837204, "signal/advantage_abs_mean": 0.7478165745735168, "signal/advantage_pre_scale_abs_mean": 0.0822651669383049, "signal/advantage_pre_scale_std": 0.1351626455783844, "signal/advantage_std": 0.9828728079795838, "signal/batch_coverage_0/centered_abs_mean": 0.15362754464149475, "signal/batch_coverage_0/group_std_mean": 0.1938132733106613, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.03567029424011707, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021968737710267305, "signal/batch_coverage_1/centered_abs_mean": 0.15362754464149475, "signal/batch_coverage_1/group_std_mean": 0.1938132733106613, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.03567029424011707, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021968737710267305, "signal/batch_coverage_10/centered_abs_mean": 0.17862621545791627, "signal/batch_coverage_10/group_std_mean": 0.2278638958930969, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.041227002441883084, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.002554354863241315, "signal/batch_coverage_15/centered_abs_mean": 0.17997177839279174, "signal/batch_coverage_15/group_std_mean": 0.22987563014030457, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04153685420751572, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002573596499860287, "signal/batch_coverage_20/centered_abs_mean": 0.18477944731712342, "signal/batch_coverage_20/group_std_mean": 0.23605856001377107, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04267275333404541, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002642346080392599, "signal/batch_coverage_25/centered_abs_mean": 0.18651285767555237, "signal/batch_coverage_25/group_std_mean": 0.23866299986839296, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04308526143431664, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0026671338360756635, "signal/batch_coverage_5/centered_abs_mean": 0.1672343373298645, "signal/batch_coverage_5/group_std_mean": 0.21082105934619905, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03876652270555496, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002391451084986329, "signal/brier_reward/centered_abs_mean": 0.11106197088956833, "signal/brier_reward/group_std_mean": 0.1462969422340393, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17965082824230194, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.01110619716346264, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.023622044920921327, "signal/confidence_uniqueness_reward/group_std_mean": 0.03481765128672123, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03806819692254067, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0023622045759111644, "signal/format_reward/centered_abs_mean": 0.005658637313172221, "signal/format_reward/group_std_mean": 0.012903223745524883, "signal/format_reward/group_zero_std_frac": 0.9388888955116272, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.04507702887058258, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0028293186565861105, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.34954912662506105, "signal/frontier_entropy_batch_reward/group_std_mean": 0.4182551920413971, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5644851803779602, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03495491221547127, "step": 190 }, { "calibration/aurc": 0.15241789162374858, "calibration/batch_distribution_entropy": 0.9546151423561307, "calibration/buffer_distribution_entropy": 0.9645976261126823, "calibration/confidence_entropy": 0.46984322577054083, "calibration/coverage@0%": 0.048566288114876136, "calibration/coverage@1%": 0.13814962144820947, "calibration/coverage@10%": 0.49193909973407013, "calibration/coverage@15%": 0.6008877744998234, "calibration/coverage@20%": 0.6417441775799271, "calibration/coverage@25%": 0.6868293467699973, "calibration/coverage@30%": 0.8711309729378772, "calibration/coverage@5%": 0.38952757420411505, "calibration/ece": 0.18659702684310667, "calibration/mean_confidence": 0.5856564245526877, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.006597222222222232, "completions/max_length": 3366.8, "completions/max_terminated_length": 3366.8, "completions/mean_length": 995.2646850585937, "completions/mean_terminated_length": 1001.8587280273438, "completions/min_length": 0.0, "completions/min_terminated_length": 305.8, "epoch": 0.46799415007312406, "grad_norm": 0.0021477374248206615, "learning_rate": 3.91566265060241e-07, "loss": -0.0182, "num_tokens": 476411114.0, "reward": 1.0307905673980713, "reward_std": 0.10914622098207474, "rewards/accuracy_reward": 0.6957465171813965, "rewards/batch_coverage_0": 0.3612436711788177, "rewards/batch_coverage_1": 0.3612436711788177, "rewards/batch_coverage_10": 0.42941672205924986, "rewards/batch_coverage_15": 0.4396386623382568, "rewards/batch_coverage_20": 0.44845632314682005, "rewards/batch_coverage_25": 0.4531115531921387, "rewards/batch_coverage_5": 0.4056135654449463, "rewards/brier_reward": 0.8242664337158203, "rewards/confidence_uniqueness_reward": 0.9394988298416138, "rewards/format_reward": 0.9933159708976745, "rewards/frontier_entropy_batch_reward": -0.315689817070961, "signal/accuracy_reward/centered_abs_mean": 0.11628146767616272, "signal/accuracy_reward/group_std_mean": 0.15813961178064345, "signal/accuracy_reward/group_zero_std_frac": 0.5277777791023255, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9150802969932557, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05814073383808136, "signal/advantage_abs_mean": 0.7424829006195068, "signal/advantage_pre_scale_abs_mean": 0.08026361167430877, "signal/advantage_pre_scale_std": 0.13525623083114624, "signal/advantage_std": 0.9828639507293702, "signal/batch_coverage_0/centered_abs_mean": 0.1481751650571823, "signal/batch_coverage_0/group_std_mean": 0.1863509714603424, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034362369775772096, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021189047722145913, "signal/batch_coverage_1/centered_abs_mean": 0.1481751650571823, "signal/batch_coverage_1/group_std_mean": 0.1863509714603424, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034362369775772096, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021189047722145913, "signal/batch_coverage_10/centered_abs_mean": 0.1680694818496704, "signal/batch_coverage_10/group_std_mean": 0.21355166733264924, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03898909837007523, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0024033935274928807, "signal/batch_coverage_15/centered_abs_mean": 0.17305652499198915, "signal/batch_coverage_15/group_std_mean": 0.22033677697181703, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.040188524127006534, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0024747082497924566, "signal/batch_coverage_20/centered_abs_mean": 0.17707459032535552, "signal/batch_coverage_20/group_std_mean": 0.2258709490299225, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04103988632559776, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025321666151285173, "signal/batch_coverage_25/centered_abs_mean": 0.1785203754901886, "signal/batch_coverage_25/group_std_mean": 0.22794765830039979, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04126601964235306, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002552841370925307, "signal/batch_coverage_5/centered_abs_mean": 0.16053160429000854, "signal/batch_coverage_5/group_std_mean": 0.2028528332710266, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.037285619601607325, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002295601973310113, "signal/brier_reward/centered_abs_mean": 0.11180263459682464, "signal/brier_reward/group_std_mean": 0.1465301349759102, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17802698612213136, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011180263943970203, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.0259183157235384, "signal/confidence_uniqueness_reward/group_std_mean": 0.038206398487091064, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.04249723106622696, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0025918317027390004, "signal/format_reward/centered_abs_mean": 0.011094835121184587, "signal/format_reward/group_std_mean": 0.020084955915808677, "signal/format_reward/group_zero_std_frac": 0.919444465637207, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.09070162996649742, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.005547417560592294, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32764663100242614, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3973482310771942, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5320763945579529, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03276466354727745, "step": 195 }, { "calibration/aurc": 0.12053504170385199, "calibration/batch_distribution_entropy": 0.9277943160312582, "calibration/buffer_distribution_entropy": 0.9649269249133663, "calibration/confidence_entropy": 0.47151812106594904, "calibration/coverage@0%": 0.04792212041884817, "calibration/coverage@1%": 0.04792212041884817, "calibration/coverage@10%": 0.550359947643979, "calibration/coverage@15%": 0.6691781195462478, "calibration/coverage@20%": 0.8827961387434555, "calibration/coverage@25%": 0.9348958333333333, "calibration/coverage@30%": 0.9635416666666667, "calibration/coverage@5%": 0.36483693280977314, "calibration/ece": 0.1565456610274869, "calibration/mean_confidence": 0.6492343007308027, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.005381944444444464, "completions/max_length": 2979.8, "completions/max_terminated_length": 2979.8, "completions/mean_length": 960.2939331054688, "completions/mean_terminated_length": 965.4896240234375, "completions/min_length": 0.0, "completions/min_terminated_length": 326.8, "epoch": 0.47999400007499904, "grad_norm": 0.002257067244499922, "learning_rate": 2.409638554216868e-07, "loss": -0.0133, "num_tokens": 490541508.0, "reward": 1.0378822088241577, "reward_std": 0.10426538735628128, "rewards/accuracy_reward": 0.7078993201255799, "rewards/batch_coverage_0": 0.3699501812458038, "rewards/batch_coverage_1": 0.3699501812458038, "rewards/batch_coverage_10": 0.43272061347961427, "rewards/batch_coverage_15": 0.4414961338043213, "rewards/batch_coverage_20": 0.45003448724746703, "rewards/batch_coverage_25": 0.45256100296974183, "rewards/batch_coverage_5": 0.40719414353370664, "rewards/brier_reward": 0.8301080942153931, "rewards/confidence_uniqueness_reward": 0.9409601330757141, "rewards/format_reward": 0.994531261920929, "rewards/frontier_entropy_batch_reward": -0.32251755595207215, "signal/accuracy_reward/centered_abs_mean": 0.10953233540058135, "signal/accuracy_reward/group_std_mean": 0.15017634779214858, "signal/accuracy_reward/group_zero_std_frac": 0.5555555582046509, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8703036189079285, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.05476616770029068, "signal/advantage_abs_mean": 0.7531775951385498, "signal/advantage_pre_scale_abs_mean": 0.07817077487707139, "signal/advantage_pre_scale_std": 0.12902514189481734, "signal/advantage_std": 0.982884156703949, "signal/batch_coverage_0/centered_abs_mean": 0.15214686691761017, "signal/batch_coverage_0/group_std_mean": 0.19155323207378389, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034885770082473753, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.002175700105726719, "signal/batch_coverage_1/centered_abs_mean": 0.15214686691761017, "signal/batch_coverage_1/group_std_mean": 0.19155323207378389, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034885770082473753, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.002175700105726719, "signal/batch_coverage_10/centered_abs_mean": 0.17132368981838225, "signal/batch_coverage_10/group_std_mean": 0.21766560673713684, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.039314419776201245, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00244992864318192, "signal/batch_coverage_15/centered_abs_mean": 0.174043932557106, "signal/batch_coverage_15/group_std_mean": 0.2217872440814972, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03994949609041214, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.002488828217610717, "signal/batch_coverage_20/centered_abs_mean": 0.17794378995895385, "signal/batch_coverage_20/group_std_mean": 0.22746247053146362, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04086562618613243, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0025445961393415926, "signal/batch_coverage_25/centered_abs_mean": 0.17909466624259948, "signal/batch_coverage_25/group_std_mean": 0.22904041409492493, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04110808596014977, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.0025610537733882664, "signal/batch_coverage_5/centered_abs_mean": 0.16287958323955537, "signal/batch_coverage_5/group_std_mean": 0.20583013594150543, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0373820461332798, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.002329178061336279, "signal/brier_reward/centered_abs_mean": 0.10916592478752137, "signal/brier_reward/group_std_mean": 0.1431722342967987, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.17486326694488524, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.010916592925786972, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02177223116159439, "signal/confidence_uniqueness_reward/group_std_mean": 0.029672250896692277, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.0346968125551939, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0021772230742499233, "signal/format_reward/centered_abs_mean": 0.007644314225763083, "signal/format_reward/group_std_mean": 0.0121083602309227, "signal/format_reward/group_zero_std_frac": 0.9555555701255798, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.05982875004410744, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0038221571128815414, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.32661651968955996, "signal/frontier_entropy_batch_reward/group_std_mean": 0.3946535289287567, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.524556452035904, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03266165219247341, "step": 200 }, { "epoch": 0.47999400007499904, "eval_calibration/aurc": 0.10989090327570772, "eval_calibration/batch_distribution_entropy": 0.9192275429652724, "eval_calibration/buffer_distribution_entropy": 0.9643264329465896, "eval_calibration/confidence_entropy": 0.4607454734710889, "eval_calibration/coverage@0%": 0.296875, "eval_calibration/coverage@1%": 0.296875, "eval_calibration/coverage@10%": 0.6458333333333334, "eval_calibration/coverage@15%": 0.7447916666666666, "eval_calibration/coverage@20%": 0.8385416666666666, "eval_calibration/coverage@25%": 0.9010416666666666, "eval_calibration/coverage@30%": 0.9739583333333334, "eval_calibration/coverage@5%": 0.40625, "eval_calibration/ece": 0.20959003645833332, "eval_calibration/mean_confidence": 0.5790771197916667, "eval_completions/clipped_ratio": 0.006076388888888895, "eval_completions/max_length": 2619.8333333333335, "eval_completions/max_terminated_length": 2619.8333333333335, "eval_completions/mean_length": 977.114247639974, "eval_completions/mean_terminated_length": 983.1600341796875, "eval_completions/min_length": 153.0, "eval_completions/min_terminated_length": 395.3333333333333, "eval_loss": 0.0, "eval_num_tokens": 490541508.0, "eval_reward": 0.9246361056963602, "eval_reward_std": 0.24066531658172607, "eval_rewards/accuracy_reward": 0.6909722288449606, "eval_rewards/batch_coverage_0": 0.04255757279073199, "eval_rewards/batch_coverage_1": 0.04255757279073199, "eval_rewards/batch_coverage_10": 0.07763015168408553, "eval_rewards/batch_coverage_15": 0.119985976566871, "eval_rewards/batch_coverage_20": 0.1694452352821827, "eval_rewards/batch_coverage_25": 0.2227772449453672, "eval_rewards/batch_coverage_5": 0.05227204862361153, "eval_rewards/brier_reward": 0.8235170344511668, "eval_rewards/confidence_uniqueness_reward": 0.8882950445016226, "eval_rewards/format_reward": 0.9939236144224802, "eval_rewards/frontier_entropy_batch_reward": -0.9939236144224802, "eval_runtime": 183.9893, "eval_samples_per_second": 5.435, "eval_signal/accuracy_reward/centered_abs_mean": 0.4173177083333333, "eval_signal/accuracy_reward/group_std_mean": 0.46319297949473065, "eval_signal/accuracy_reward/group_zero_std_frac": 0.0, "eval_signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.8747199972470602, "eval_signal/accuracy_reward/weight": 0.5, "eval_signal/accuracy_reward/weighted_centered_abs_mean": 0.20865885416666666, "eval_signal/advantage_abs_mean": 0.8748045464356741, "eval_signal/advantage_pre_scale_abs_mean": 0.21066379050413767, "eval_signal/advantage_pre_scale_std": 0.2384971926609675, "eval_signal/advantage_std": 0.9864106674989065, "eval_signal/batch_coverage_0/centered_abs_mean": 0.1913293475906054, "eval_signal/batch_coverage_0/group_std_mean": 0.2734084377686183, "eval_signal/batch_coverage_0/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.011484552950908741, "eval_signal/batch_coverage_0/weight": 0.014299999922513962, "eval_signal/batch_coverage_0/weighted_centered_abs_mean": 0.0027360095409676433, "eval_signal/batch_coverage_1/centered_abs_mean": 0.1913293475906054, "eval_signal/batch_coverage_1/group_std_mean": 0.2734084377686183, "eval_signal/batch_coverage_1/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.011484552950908741, "eval_signal/batch_coverage_1/weight": 0.014299999922513962, "eval_signal/batch_coverage_1/weighted_centered_abs_mean": 0.0027360095409676433, "eval_signal/batch_coverage_10/centered_abs_mean": 0.14493575568000475, "eval_signal/batch_coverage_10/group_std_mean": 0.19580343117316565, "eval_signal/batch_coverage_10/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.008692070453738173, "eval_signal/batch_coverage_10/weight": 0.014299999922513962, "eval_signal/batch_coverage_10/weighted_centered_abs_mean": 0.002072581206448376, "eval_signal/batch_coverage_15/centered_abs_mean": 0.15486098205049834, "eval_signal/batch_coverage_15/group_std_mean": 0.19644736250241598, "eval_signal/batch_coverage_15/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.00930007042673727, "eval_signal/batch_coverage_15/weight": 0.014299999922513962, "eval_signal/batch_coverage_15/weighted_centered_abs_mean": 0.002214512108669927, "eval_signal/batch_coverage_20/centered_abs_mean": 0.18681678672631583, "eval_signal/batch_coverage_20/group_std_mean": 0.23144895086685816, "eval_signal/batch_coverage_20/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.011187337494144836, "eval_signal/batch_coverage_20/weight": 0.014299999922513962, "eval_signal/batch_coverage_20/weighted_centered_abs_mean": 0.002671479946002364, "eval_signal/batch_coverage_25/centered_abs_mean": 0.23808404554923376, "eval_signal/batch_coverage_25/group_std_mean": 0.28926754742860794, "eval_signal/batch_coverage_25/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.014209352588901917, "eval_signal/batch_coverage_25/weight": 0.014299999922513962, "eval_signal/batch_coverage_25/weighted_centered_abs_mean": 0.0034046017875274024, "eval_signal/batch_coverage_5/centered_abs_mean": 0.17653791109720865, "eval_signal/batch_coverage_5/group_std_mean": 0.24955658366282782, "eval_signal/batch_coverage_5/group_zero_std_frac": 0.0, "eval_signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.010630226538827022, "eval_signal/batch_coverage_5/weight": 0.014299999922513962, "eval_signal/batch_coverage_5/weighted_centered_abs_mean": 0.002524492136823634, "eval_signal/brier_reward/centered_abs_mean": 0.17701477309068045, "eval_signal/brier_reward/group_std_mean": 0.23732628921667734, "eval_signal/brier_reward/group_zero_std_frac": 0.0, "eval_signal/brier_reward/scaled_weighted_centered_abs_mean": 0.07436835144956906, "eval_signal/brier_reward/weight": 0.10000000149011612, "eval_signal/brier_reward/weighted_centered_abs_mean": 0.017701477278023958, "eval_signal/confidence_uniqueness_reward/centered_abs_mean": 0.04846427279214064, "eval_signal/confidence_uniqueness_reward/group_std_mean": 0.07100162468850613, "eval_signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "eval_signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.020224129781126976, "eval_signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "eval_signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.004846427279214065, "eval_signal/format_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/format_reward/group_std_mean": 0.031383837262789406, "eval_signal/format_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/format_reward/scaled_weighted_centered_abs_mean": 0.02375806588679552, "eval_signal/format_reward/weight": 0.5, "eval_signal/format_reward/weighted_centered_abs_mean": 0.0058322482121487456, "eval_signal/frontier_entropy_batch_reward/centered_abs_mean": 0.011664496424297491, "eval_signal/frontier_entropy_batch_reward/group_std_mean": 0.031383837262789406, "eval_signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.8333333532015482, "eval_signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.004751613363623619, "eval_signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "eval_signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.0011664496657128136, "eval_steps_per_second": 0.033, "step": 200 }, { "calibration/aurc": 0.13501105600234406, "calibration/batch_distribution_entropy": 0.9435655969085556, "calibration/buffer_distribution_entropy": 0.9648407300747447, "calibration/confidence_entropy": 0.45358109081944986, "calibration/coverage@0%": 0.020366075348682823, "calibration/coverage@1%": 0.020366075348682823, "calibration/coverage@10%": 0.46117268003382444, "calibration/coverage@15%": 0.5934764138336502, "calibration/coverage@20%": 0.8597784983913943, "calibration/coverage@25%": 0.9260988297446133, "calibration/coverage@30%": 0.9656213979196467, "calibration/coverage@5%": 0.19665257230616368, "calibration/ece": 0.14935273101890895, "calibration/mean_confidence": 0.610298141093447, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.00390625, "completions/max_length": 3466.0, "completions/max_terminated_length": 3466.0, "completions/mean_length": 990.8876098632812, "completions/mean_terminated_length": 994.7953857421875, "completions/min_length": 0.0, "completions/min_terminated_length": 312.6, "epoch": 0.491993850076874, "grad_norm": 0.002186190104112029, "learning_rate": 9.036144578313253e-08, "loss": -0.005, "num_tokens": 505022485.0, "reward": 1.0617381572723388, "reward_std": 0.10663122683763504, "rewards/accuracy_reward": 0.7565972208976746, "rewards/batch_coverage_0": 0.370861279964447, "rewards/batch_coverage_1": 0.370861279964447, "rewards/batch_coverage_10": 0.4283327877521515, "rewards/batch_coverage_15": 0.43600705862045286, "rewards/batch_coverage_20": 0.44471372961997985, "rewards/batch_coverage_25": 0.44584651589393615, "rewards/batch_coverage_5": 0.40272694230079653, "rewards/brier_reward": 0.8254562616348267, "rewards/confidence_uniqueness_reward": 0.941547405719757, "rewards/format_reward": 0.9959201335906982, "rewards/frontier_entropy_batch_reward": -0.32681636810302733, "signal/accuracy_reward/centered_abs_mean": 0.12428385466337204, "signal/accuracy_reward/group_std_mean": 0.1633887231349945, "signal/accuracy_reward/group_zero_std_frac": 0.5333333432674408, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9983546733856201, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06214192733168602, "signal/advantage_abs_mean": 0.7620988607406616, "signal/advantage_pre_scale_abs_mean": 0.08042153120040893, "signal/advantage_pre_scale_std": 0.1311565786600113, "signal/advantage_std": 0.9828817129135132, "signal/batch_coverage_0/centered_abs_mean": 0.1498122900724411, "signal/batch_coverage_0/group_std_mean": 0.1872162103652954, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.034457380324602126, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.0021423157304525375, "signal/batch_coverage_1/centered_abs_mean": 0.1498122900724411, "signal/batch_coverage_1/group_std_mean": 0.1872162103652954, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.034457380324602126, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.0021423157304525375, "signal/batch_coverage_10/centered_abs_mean": 0.1685813695192337, "signal/batch_coverage_10/group_std_mean": 0.21188536584377288, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03877582773566246, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.0024107135832309723, "signal/batch_coverage_15/centered_abs_mean": 0.16964525282382964, "signal/batch_coverage_15/group_std_mean": 0.213523331284523, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.03902594596147537, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0024259272031486034, "signal/batch_coverage_20/centered_abs_mean": 0.1723014682531357, "signal/batch_coverage_20/group_std_mean": 0.21752589643001558, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.039649903774261475, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.002463910961523652, "signal/batch_coverage_25/centered_abs_mean": 0.17293465435504912, "signal/batch_coverage_25/group_std_mean": 0.2184917449951172, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.03979569748044014, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002472965605556965, "signal/batch_coverage_5/centered_abs_mean": 0.15981419682502745, "signal/batch_coverage_5/group_std_mean": 0.1999186486005783, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.03676430657505989, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0022853429429233072, "signal/brier_reward/centered_abs_mean": 0.11391685307025909, "signal/brier_reward/group_std_mean": 0.14772746860980987, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.18313678205013276, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011391685344278812, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.02226530760526657, "signal/confidence_uniqueness_reward/group_std_mean": 0.03310300558805466, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.03581138141453266, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.002226530876941979, "signal/format_reward/centered_abs_mean": 0.006971571210306138, "signal/format_reward/group_std_mean": 0.014379321224987507, "signal/format_reward/group_zero_std_frac": 0.9361111283302307, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.056047255359590056, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.003485785605153069, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.3415841281414032, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40630670785903933, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5496011257171631, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.03415841460227966, "step": 205 }, { "calibration/aurc": 0.0754442216982998, "calibration/batch_distribution_entropy": 0.9318504533202431, "calibration/buffer_distribution_entropy": 0.9649572636750735, "calibration/confidence_entropy": 0.46937859993147657, "calibration/coverage@0%": 0.04861111111111111, "calibration/coverage@1%": 0.2829861111111111, "calibration/coverage@10%": 0.7209869995648389, "calibration/coverage@15%": 0.8435097548592979, "calibration/coverage@20%": 0.9209094865100087, "calibration/coverage@25%": 0.9791666666666666, "calibration/coverage@30%": 1.0, "calibration/coverage@5%": 0.583623440673049, "calibration/ece": 0.1433574382728641, "calibration/mean_confidence": 0.6334517717013889, "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0021701388888889137, "completions/max_length": 3671.0, "completions/max_terminated_length": 3671.0, "completions/mean_length": 987.3819783528646, "completions/mean_terminated_length": 989.5008951822916, "completions/min_length": 0.0, "completions/min_terminated_length": 290.6666666666667, "epoch": 0.49919376007799904, "num_tokens": 513713317.0, "reward": 1.0401251713434856, "reward_std": 0.10764590402444203, "rewards/accuracy_reward": 0.7122395833333334, "rewards/batch_coverage_0": 0.3732293943564097, "rewards/batch_coverage_1": 0.3732293943564097, "rewards/batch_coverage_10": 0.4244362413883209, "rewards/batch_coverage_15": 0.4366017282009125, "rewards/batch_coverage_20": 0.44373422861099243, "rewards/batch_coverage_25": 0.4510187606016795, "rewards/batch_coverage_5": 0.39601942896842957, "rewards/brier_reward": 0.8295948306719462, "rewards/confidence_uniqueness_reward": 0.9432157675425211, "rewards/format_reward": 0.9976851940155029, "rewards/frontier_entropy_batch_reward": -0.33563558260599774, "signal/accuracy_reward/centered_abs_mean": 0.12830042093992233, "signal/accuracy_reward/group_std_mean": 0.16989988088607788, "signal/accuracy_reward/group_zero_std_frac": 0.5138888955116272, "signal/accuracy_reward/scaled_weighted_centered_abs_mean": 0.9699827035268148, "signal/accuracy_reward/weight": 0.5, "signal/accuracy_reward/weighted_centered_abs_mean": 0.06415021046996117, "signal/advantage_abs_mean": 0.7430837154388428, "signal/advantage_pre_scale_abs_mean": 0.07963799188534419, "signal/advantage_pre_scale_std": 0.12994669129451117, "signal/advantage_std": 0.9829309582710266, "signal/batch_coverage_0/centered_abs_mean": 0.16159847378730774, "signal/batch_coverage_0/group_std_mean": 0.20468567808469137, "signal/batch_coverage_0/group_zero_std_frac": 0.0, "signal/batch_coverage_0/scaled_weighted_centered_abs_mean": 0.0359560064971447, "signal/batch_coverage_0/weight": 0.014299999922513962, "signal/batch_coverage_0/weighted_centered_abs_mean": 0.00231085818571349, "signal/batch_coverage_1/centered_abs_mean": 0.16159847378730774, "signal/batch_coverage_1/group_std_mean": 0.20468567808469137, "signal/batch_coverage_1/group_zero_std_frac": 0.0, "signal/batch_coverage_1/scaled_weighted_centered_abs_mean": 0.0359560064971447, "signal/batch_coverage_1/weight": 0.014299999922513962, "signal/batch_coverage_1/weighted_centered_abs_mean": 0.00231085818571349, "signal/batch_coverage_10/centered_abs_mean": 0.17450785636901855, "signal/batch_coverage_10/group_std_mean": 0.22163488964239755, "signal/batch_coverage_10/group_zero_std_frac": 0.0, "signal/batch_coverage_10/scaled_weighted_centered_abs_mean": 0.03893564393122991, "signal/batch_coverage_10/weight": 0.014299999922513962, "signal/batch_coverage_10/weighted_centered_abs_mean": 0.00249546238531669, "signal/batch_coverage_15/centered_abs_mean": 0.18060380717118582, "signal/batch_coverage_15/group_std_mean": 0.23038248717784882, "signal/batch_coverage_15/group_zero_std_frac": 0.0, "signal/batch_coverage_15/scaled_weighted_centered_abs_mean": 0.04025426755348841, "signal/batch_coverage_15/weight": 0.014299999922513962, "signal/batch_coverage_15/weighted_centered_abs_mean": 0.0025826343335211277, "signal/batch_coverage_20/centered_abs_mean": 0.18379070361455283, "signal/batch_coverage_20/group_std_mean": 0.23500894010066986, "signal/batch_coverage_20/group_zero_std_frac": 0.0, "signal/batch_coverage_20/scaled_weighted_centered_abs_mean": 0.04088542362054189, "signal/batch_coverage_20/weight": 0.014299999922513962, "signal/batch_coverage_20/weighted_centered_abs_mean": 0.0026282070515056453, "signal/batch_coverage_25/centered_abs_mean": 0.18744313220183054, "signal/batch_coverage_25/group_std_mean": 0.23961820701758066, "signal/batch_coverage_25/group_zero_std_frac": 0.0, "signal/batch_coverage_25/scaled_weighted_centered_abs_mean": 0.04164646069208781, "signal/batch_coverage_25/weight": 0.014299999922513962, "signal/batch_coverage_25/weighted_centered_abs_mean": 0.002680436708033085, "signal/batch_coverage_5/centered_abs_mean": 0.168576513727506, "signal/batch_coverage_5/group_std_mean": 0.2129517843325933, "signal/batch_coverage_5/group_zero_std_frac": 0.0, "signal/batch_coverage_5/scaled_weighted_centered_abs_mean": 0.0375792533159256, "signal/batch_coverage_5/weight": 0.014299999922513962, "signal/batch_coverage_5/weighted_centered_abs_mean": 0.0024106441996991634, "signal/brier_reward/centered_abs_mean": 0.11503704637289047, "signal/brier_reward/group_std_mean": 0.1499533106883367, "signal/brier_reward/group_zero_std_frac": 0.0, "signal/brier_reward/scaled_weighted_centered_abs_mean": 0.1776889761288961, "signal/brier_reward/weight": 0.10000000149011612, "signal/brier_reward/weighted_centered_abs_mean": 0.011503705444435278, "signal/confidence_uniqueness_reward/centered_abs_mean": 0.01900616039832433, "signal/confidence_uniqueness_reward/group_std_mean": 0.030254953851302464, "signal/confidence_uniqueness_reward/group_zero_std_frac": 0.0, "signal/confidence_uniqueness_reward/scaled_weighted_centered_abs_mean": 0.02986216110487779, "signal/confidence_uniqueness_reward/weight": 0.10000000149011612, "signal/confidence_uniqueness_reward/weighted_centered_abs_mean": 0.0019006160631154974, "signal/format_reward/centered_abs_mean": 0.004466869169846177, "signal/format_reward/group_std_mean": 0.012596335262060165, "signal/format_reward/group_zero_std_frac": 0.9305555820465088, "signal/format_reward/scaled_weighted_centered_abs_mean": 0.035761150221029915, "signal/format_reward/weight": 0.5, "signal/format_reward/weighted_centered_abs_mean": 0.0022334345849230886, "signal/frontier_entropy_batch_reward/centered_abs_mean": 0.33474533756573993, "signal/frontier_entropy_batch_reward/group_std_mean": 0.40274082620938617, "signal/frontier_entropy_batch_reward/group_zero_std_frac": 0.0, "signal/frontier_entropy_batch_reward/scaled_weighted_centered_abs_mean": 0.5223466257254282, "signal/frontier_entropy_batch_reward/weight": 0.10000000149011612, "signal/frontier_entropy_batch_reward/weighted_centered_abs_mean": 0.033474533508221306, "step": 208, "total_flos": 0.0, "train_loss": -0.03366587917886276, "train_runtime": 42091.8671, "train_samples_per_second": 0.356, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 208, "num_input_tokens_seen": 513713317, "num_train_epochs": 1, "save_steps": 60, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }